TestDataGenerator.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. import csv
  2. import itertools
  3. import xlsxwriter
  4. import errno
  5. import os
  6. import logging
  7. import faker
  8. from random import sample, choice
  9. import baangt.base.GlobalConstants as GC
  10. import re
  11. import sys
  12. import pandas as pd
  13. from CloneXls import CloneXls
  14. import json
  15. logger = logging.getLogger("pyC")
  16. class PrefixData:
  17. def __init__(self, dataList, prefix, tdg_object=None):
  18. self.dataList = dataList
  19. self.prefix = prefix
  20. self.tdg_object = tdg_object
  21. self.process()
  22. def process(self):
  23. if self.prefix.lower() == "rrd" or self.prefix.lower() == "rre":
  24. self.dataList = [
  25. data for data in self.dataList if not self.tdg_object.usecount_dict[repr(data)]["limit"] or \
  26. self.tdg_object.usecount_dict[repr(data)]['use'] < self.tdg_object.usecount_dict[repr(data)]['limit']
  27. ]
  28. elif self.prefix.lower() == "fkr":
  29. fake = faker.Faker(self.dataList[1])
  30. fake_lis = []
  31. if len(self.dataList) == 3:
  32. if int(self.dataList[2]) == 0:
  33. fake_lis.append(getattr(fake, self.dataList[0])())
  34. else:
  35. for x in range(int(self.dataList[2])):
  36. fake_lis.append(getattr(fake, self.dataList[0])())
  37. else:
  38. for x in range(5):
  39. fake_lis.append(getattr(fake, self.dataList[0])())
  40. self.dataList = fake_lis
  41. def return_random(self):
  42. if self.prefix == "rre" or self.prefix == "rrd":
  43. if not len(self.dataList):
  44. print(self.dataList)
  45. raise BaseException(f"Not enough data, please verify if data is present or usecount limit" \
  46. "has reached!!")
  47. data = choice(self.dataList)
  48. self.tdg_object.usecount_dict[repr(data)]['use'] += 1
  49. if self.tdg_object.usecount_dict[repr(data)]['limit'] and \
  50. self.tdg_object.usecount_dict[repr(data)]['use'] >= self.tdg_object.usecount_dict[repr(data)]['limit']:
  51. self.dataList.remove(data)
  52. return data
  53. elif self.prefix.lower() == "fkr":
  54. return choice(self.dataList)
  55. elif self.prefix == 'rnd':
  56. return choice(self.dataList)
  57. class TestDataGenerator:
  58. """
  59. TestDataGenerator Class is to used to create a TestData file from raw excel file containing all possible values.
  60. Formats accepted in input excel file:
  61. 1. Value = ``<value>``
  62. 2. list of values = ``[<value1>,<value2>]``
  63. 3. range = ``<start>-<end>,<step>``
  64. 4. random = ``RND_[list]``
  65. 5. random from range = ``RND_<start>-<end>,<step>``
  66. 6. List of header = ``[<title1>, <title2>, <title3>]``
  67. 7. Faker Prefix = ``FKR_(<type>, <locale>, <number_of_data>)``
  68. 8. RRD Prefix = ``RRD_(<sheetName>,<TargetData>,[<Header1>:[<Value1>],<Header2>:[<Value1>,<Value2>]])``
  69. :param rawExcelPath: Takes input path for xlsx file containing input data.
  70. :param sheetName: Name of sheet where all base data is located.
  71. :method write: Will write the final processed data in excel/csv file.
  72. """
  73. def __init__(self, rawExcelPath=GC.TESTDATAGENERATOR_INPUTFILE, sheetName="",
  74. from_handleDatabase=False, noUpdate=True):
  75. self.path = os.path.abspath(rawExcelPath)
  76. self.sheet_name = sheetName
  77. if not os.path.isfile(self.path):
  78. raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), self.path)
  79. self.sheet_dict, self.raw_data_json = self.read_excel(self.path, self.sheet_name)
  80. self.rre_sheets = {}
  81. self.isUsecount = {}
  82. self.remove_header = []
  83. self.usecount_dict = {} # used to maintain usecount limit record and verify if that non of the data cross limit
  84. self.done = {}
  85. self.noUpdateFiles = noUpdate
  86. self.writers = {}
  87. if not from_handleDatabase:
  88. self.processed_datas = self.__process_data(self.raw_data_json)
  89. self.final_data = self.__generateFinalData(self.processed_datas)
  90. if self.isUsecount:
  91. if not self.noUpdateFiles:
  92. self.save_usecount() # saving source input file once everything is done
  93. def write(self, OutputFormat=GC.TESTDATAGENERATOR_OUTPUT_FORMAT, batch_size=0, outputfile=None):
  94. """
  95. Will write the generated data in output file.
  96. :param OutputFormat: "xlsx" or "csv"
  97. :param batch_size: Number of data to be written in output file. Will be randomly selected.
  98. :param outputfile: name and path of outputfile.
  99. :return:
  100. """
  101. if OutputFormat.lower() == "xlsx":
  102. if outputfile == None:
  103. outputfile = GC.TESTDATAGENERATOR_OUTPUTFILE_XLSX
  104. #self.__write_excel(batch_size=batch_size, outputfile=outputfile)
  105. with pd.ExcelWriter(outputfile) as writer:
  106. self.final_data.to_excel(writer, index=False)
  107. writer.save()
  108. elif OutputFormat.lower() == "csv":
  109. if outputfile == None:
  110. outputfile = GC.TESTDATAGENERATOR_OUTPUTFILE_CSV
  111. self.__write_csv(batch_size=batch_size, outputfile=outputfile)
  112. else:
  113. logger.debug("Incorrect file format")
  114. def __write_excel(self, outputfile=GC.TESTDATAGENERATOR_OUTPUTFILE_XLSX, batch_size=0):
  115. """
  116. Writes TestData file with final processsed data.
  117. :param outputfile: Name and path for output file.
  118. :param batch_size: No. of data to be randomly selected and written in output file.
  119. :return: None
  120. """
  121. if batch_size > 0:
  122. if len(self.final_data) > batch_size:
  123. data_lis = sample(self.final_data, batch_size)
  124. else:
  125. data_lis = self.final_data
  126. logger.debug("Total final data is smaller than batch size.")
  127. else:
  128. data_lis = self.final_data
  129. with xlsxwriter.Workbook(outputfile) as workbook:
  130. worksheet = workbook.add_worksheet()
  131. worksheet.write_row(0, 0, self.headers)
  132. for row_num, data in enumerate(data_lis):
  133. worksheet.write_row(row_num+1, 0, data)
  134. def __write_csv(self, outputfile=GC.TESTDATAGENERATOR_OUTPUTFILE_CSV, batch_size=0):
  135. """
  136. Writes final data in csv
  137. :param outputfile: Name and path of output file
  138. :param batch_size: No. of data to be randomly selected and written in output file.
  139. :return:
  140. """
  141. if batch_size > 0:
  142. if len(self.final_data) > batch_size:
  143. data_lis = sample(self.final_data, batch_size)
  144. else:
  145. data_lis = self.final_data
  146. else:
  147. data_lis = self.final_data
  148. with open(outputfile, 'w', newline='\n', encoding='utf-8-sig') as file:
  149. fl = csv.writer(file)
  150. fl.writerow(self.headers)
  151. for dt in data_lis:
  152. fl.writerow(list(dt))
  153. def __generateFinalData(self, processed_data):
  154. """
  155. This method will do the final process on the processed_data. Processed_data contains list of dictionary, each
  156. dictionary is the row from input file which are processed to be interact able in python as per the requirement.
  157. First loop is of processed_data
  158. Second loop is of the dictionary(row) and each key:value of that dictionary is header:processed_data
  159. Method will first check the data type of value.
  160. If it is a string than method will put it inside a list(i.e. ["string"])
  161. If it is a tuple than it is a data with prefix so it will be sent to ``__prefix_data_processing`` method for
  162. further processing.
  163. Else the value is of type list.
  164. Then we store all this lists in a list(can be treat as row). This list contains value of cells. They are evaluted
  165. i.e. ranges are converted to list, strings are converted to list & list are all ready list. So to generate all
  166. possible combinations from it we use ``iterable`` module.
  167. Once this list of lists which contains all possible combinations is created we will call
  168. ``__update_prefix_data_in_final_list`` method. This method will insert the processed prefix data along with the
  169. data of all combinations list in the final list with the correct position of every value.
  170. Finally it will return the list of lists which is completely processed and ready to be written in output file.
  171. :param processed_data:
  172. :return: Final_data_list
  173. """
  174. for dic in processed_data:
  175. for key in dic:
  176. if type(dic[key]) == PrefixData:
  177. dic[key] = dic[key].return_random()
  178. final_data = pd.DataFrame(processed_data)
  179. return final_data
  180. def __process_data(self, raw_json):
  181. """
  182. This method is used to Process all the raw unprocessed data read from the excel file.
  183. It will first send the header to ``__data_generator`` so that if it is a list then it will get converted in
  184. individual header.
  185. Later it will process the values using ``__data_generator``.
  186. It will then check returned iterable type, if it is a tuple that mean input value was with prefix, so, it will
  187. further check if the tuple contains dict. If True than prefix was RRD_. In that case we will have to deal with
  188. the original header of the input value. Because if the original value's header is not in the TargetData then this
  189. header will contain no value in the output file and my cause errors too. So the header will added in
  190. ``self.remove_header`` list which will be further used to remove it from main header list.
  191. Finally it will return list of dictionarys. Each dictionary contains processed data of a row of input file.
  192. Processed data are the raw data converted into python data type and iterables. Ranges are converted into list.
  193. :param raw_json:
  194. :return:
  195. """
  196. processed_datas = []
  197. raw_json = json.loads(raw_json.to_json(orient="records"))
  198. for raw_data in raw_json:
  199. if not list(raw_data.values())[0]:
  200. continue
  201. processed_data = {}
  202. for key in raw_data:
  203. keys = self.__splitList(key)
  204. for ke in keys:
  205. data = self.data_generators(raw_data[key])
  206. if type(data) != list:
  207. processed_data[ke] = [data]
  208. else:
  209. processed_data[ke] = data
  210. product = list(self.product_dict(**processed_data))
  211. processed_datas += product
  212. print(len(processed_datas))
  213. return processed_datas
  214. @staticmethod
  215. def product_dict(**kwargs):
  216. keys = kwargs.keys()
  217. vals = kwargs.values()
  218. for instance in itertools.product(*vals):
  219. yield dict(zip(keys, instance))
  220. def data_generators(self, raw_data_old):
  221. """
  222. This method first send the data to ``__raw_data_string_process`` method to split the data and remove the unwanted
  223. spaces.
  224. Later this method uses other methods to convert all the different data_types from string to their respective
  225. python data types.
  226. i.e. string list to python list, etc.
  227. Later according to the prefix of data and the data_type assigned it will convert them.
  228. Simple list and strings are converted in to ``list`` type.
  229. Data with prefix will converted in to ``tuple`` type so further it will be helpful in distinguishing. Also it will
  230. insert the prefix name in first value of tuple if the prefix is ``FKR_`` so it will be helpful in further process.
  231. Finally it will return the iterable for further process.
  232. :param raw_data:
  233. :return: List or Tuple containing necessary data
  234. """
  235. raw_data, prefix, data_type = self.__raw_data_string_process(raw_data_old)
  236. if len(raw_data)<=1:
  237. return [""]
  238. if prefix == "Rnd":
  239. if "-" in raw_data:
  240. raw_data = raw_data.split('-')
  241. start = raw_data[0].strip()
  242. end = raw_data[1].strip()
  243. step = 1
  244. if "," in end:
  245. raw_data = end.split(",")
  246. end = raw_data[0].strip()
  247. step = raw_data[1].strip()
  248. processed_datas = [x for x in range(int(start), int(end) + 1, int(step))]
  249. else:
  250. processed_datas = self.__splitList(raw_data)
  251. processed_datas = PrefixData(processed_datas, 'rnd')
  252. elif prefix == "Faker":
  253. dataList = [data.strip() for data in raw_data[1:-1].split(",")]
  254. processed_datas = PrefixData(dataList, prefix="fkr")
  255. elif prefix == "Rrd":
  256. sheet_name, data_looking_for, data_to_match = self.extractDataFromRrd(raw_data)
  257. try:
  258. dataList = self.__processRrdRre(sheet_name, data_looking_for, data_to_match)
  259. processed_datas = PrefixData(dataList, prefix='rrd', tdg_object=self)
  260. except KeyError:
  261. sys.exit(f"Please check that source files contains all the headers mentioned in : {raw_data_old}")
  262. elif prefix == "Rre":
  263. file_name = raw_data[1:-1].split(',')[0].strip()
  264. sheet_name, data_looking_for, data_to_match = self.extractDataFromRrd(raw_data, index=1)
  265. try:
  266. dataList = self.__processRrdRre(sheet_name, data_looking_for, data_to_match, filename=file_name)
  267. processed_datas = PrefixData(dataList, prefix="rre", tdg_object=self)
  268. except KeyError:
  269. sys.exit(f"Please check that source files contains all the headers mentioned in : {raw_data_old}")
  270. elif prefix == "Renv":
  271. processed_datas = self.get_env_variable(raw_data)
  272. elif raw_data[0] == "[" and raw_data[-1] == "]":
  273. processed_datas = self.__splitList(raw_data)
  274. elif "-" in raw_data:
  275. raw_data_original = raw_data[:]
  276. raw_data = raw_data.split('-')
  277. start = raw_data[0].strip()
  278. end = raw_data[1].strip()
  279. step = 1
  280. if "," in end:
  281. raw_data = end.split(",")
  282. end = raw_data[0].strip()
  283. step = raw_data[1].strip()
  284. try:
  285. processed_datas = [x for x in range(int(start), int(end)+1, int(step))]
  286. except:
  287. processed_datas = [raw_data_original.strip()]
  288. else:
  289. processed_datas = raw_data.strip()
  290. return processed_datas
  291. def extractDataFromRrd(self, raw_data, index=0):
  292. first_value = raw_data[1:-1].split(',')[0+index].strip()
  293. second_value = raw_data[1:-1].split(',')[1+index].strip()
  294. if second_value[0] == "[":
  295. second_value = ','.join(raw_data[1:-1].split(',')[1+index:]).strip()
  296. second_value = second_value[:second_value.index(']') + 1]
  297. third_value = [x.strip() for x in ']'.join(raw_data[1:-1].split(']')[1:]).split(',')[1:]]
  298. else:
  299. third_value = [x.strip() for x in raw_data[1:-1].split(',')[2+index:]]
  300. evaluated_list = ']],'.join(','.join(third_value)[1:-1].strip().split('],')).split('],')
  301. if evaluated_list[0] == "":
  302. evaluated_dict = {}
  303. else:
  304. evaluated_dict = {
  305. splited_data.split(':')[0]: self.__splitList(splited_data.split(':')[1]) for splited_data in
  306. evaluated_list
  307. }
  308. if second_value[0] == "[" and second_value[-1] == "]":
  309. second_value = self.__splitList(second_value)
  310. return first_value, second_value, evaluated_dict
  311. def __processRrdRre(self, sheet_name, data_looking_for, data_to_match: dict, filename=None):
  312. if filename:
  313. filename = os.path.join(os.path.dirname(self.path), filename)
  314. if not self.noUpdateFiles:
  315. file_name = ".".join(filename.split(".")[:-1])
  316. file_extension = filename.split(".")[-1]
  317. file = file_name + "_baangt" + "." + file_extension
  318. else:
  319. file = filename
  320. if not file in self.rre_sheets:
  321. logger.debug(f"Creating clone file of: {filename}")
  322. if not self.noUpdateFiles:
  323. filename = CloneXls(filename).update_or_make_clone()
  324. self.rre_sheets[filename] = {}
  325. filename = file
  326. if sheet_name in self.rre_sheets[filename]:
  327. df = self.rre_sheets[filename][sheet_name]
  328. else:
  329. df = pd.read_excel(filename, sheet_name, dtype=str)
  330. df.fillna("", inplace=True)
  331. self.rre_sheets[filename][sheet_name] = df
  332. else:
  333. df = self.sheet_dict[sheet_name]
  334. if not self.path in self.rre_sheets:
  335. self.rre_sheets[self.path] = {}
  336. if not sheet_name in self.rre_sheets[self.path]:
  337. self.rre_sheets[self.path][sheet_name] = df
  338. df1 = df.copy()
  339. for key, value in data_to_match.items():
  340. if not isinstance(value, list):
  341. value = [value]
  342. df1 = df1.loc[df1[key].isin(value)]
  343. data_lis = []
  344. if type(data_looking_for) == str:
  345. data_looking_for = data_looking_for.split(",")
  346. data_new_header = {}
  347. data_looking_for_old = data_looking_for[:]
  348. data_looking_for = []
  349. for header in data_looking_for_old:
  350. if ":" in header:
  351. old_header = header.split(":")[0].strip()
  352. new_header = header.split(":")[1].strip()
  353. else:
  354. old_header = header
  355. new_header = header
  356. data_new_header[old_header] = new_header
  357. data_looking_for.append(header)
  358. key_name = repr(sheet_name) + repr(data_looking_for) + repr(data_to_match) + repr(filename)
  359. if key_name in self.done:
  360. logger.debug(f"Data Gathered from previously saved data.")
  361. return self.done[key_name]
  362. usecount, limit, usecount_header = self.check_usecount(df.columns.values.tolist())
  363. if not filename:
  364. if self.path not in self.isUsecount:
  365. self.isUsecount[self.path] = usecount_header
  366. if not self.isUsecount[self.path]:
  367. self.isUsecount[self.path] = usecount_header
  368. else:
  369. if filename not in self.isUsecount:
  370. self.isUsecount[filename] = usecount_header
  371. if not self.isUsecount[filename]:
  372. self.isUsecount[filename] = usecount_header
  373. df1_dict = df1.to_dict(orient="index")
  374. for index in df1_dict:
  375. data = df1_dict[index]
  376. if usecount_header:
  377. try:
  378. used_limit = int(data[usecount_header])
  379. except:
  380. used_limit = 0
  381. else:
  382. used_limit = 0
  383. if data_looking_for[0] == "*":
  384. data_lis.append(data)
  385. self.usecount_dict[repr(data)] = {
  386. "use": used_limit, "limit": limit, "index": index,
  387. "sheet_name": sheet_name, "file_name": filename
  388. }
  389. else:
  390. dt = {header: data[keys] for (keys, header) in zip(data_looking_for, data_looking_for_old)}
  391. data_lis.append(dt)
  392. self.usecount_dict[repr(dt)] = {
  393. "use": used_limit, "limit": limit, "index": index,
  394. "sheet_name": sheet_name, "file_name": filename
  395. }
  396. if len(data_lis) == 0:
  397. logger.info(f"No data matching: {data_to_match}")
  398. sys.exit(f"No data matching: {data_to_match}")
  399. logger.debug(f"New Data Gathered.")
  400. self.done[key_name] = data_lis
  401. return data_lis
  402. def __raw_data_string_process(self, raw_string):
  403. """
  404. Returns ``String, prefix, data_type`` which are later used to decided the process to perform on string.
  405. Their depth explanation are written in the function where they are used.
  406. It will process the value string of all cells in the input sheet.
  407. It will first convert all floats into string as by default xlrd ints are converted in float
  408. Later it will check if the string size is greater than 4 or not. If not then it will simply return the values,
  409. else it will process further.
  410. If string has more than 4 characters, this method will look if the fourth character is "_" or not. If not it will
  411. return the values. Else it mean there is prefix in string and it will process further.
  412. Later it will split the prefix from the value and define the data_type according to the string.
  413. If their is no matching prefix then the data type wil be list else it will be tuple.
  414. :param raw_string:
  415. :return: String of values, prefix, Data_type
  416. """
  417. if type(raw_string) == float:
  418. raw_string = int(raw_string)
  419. raw_string = str(raw_string).strip()
  420. prefix = ""
  421. if len(raw_string)>4:
  422. if raw_string[3] == "_":
  423. if raw_string[:4].lower() == "rnd_":
  424. prefix = "Rnd"
  425. raw_string = raw_string[4:]
  426. data_type = tuple
  427. elif raw_string[:4].lower() == "fkr_":
  428. prefix = "Faker"
  429. raw_string = raw_string[4:]
  430. data_type = tuple
  431. elif raw_string[:4].lower() == "rrd_": # Remote Random (Remote = other sheet)
  432. prefix = "Rrd"
  433. raw_string = self.__process_rrd_string(raw_string)
  434. raw_string = raw_string[4:]
  435. data_type = tuple
  436. elif raw_string[:4].lower() == "rre_": # Remote Random (Remote = other sheet)
  437. prefix = "Rre"
  438. raw_string = self.__process_rre_string(raw_string)
  439. raw_string = raw_string[4:]
  440. data_type = tuple
  441. else:
  442. data_type = list
  443. else:
  444. if raw_string[:5].lower() == "renv_":
  445. prefix = "Renv"
  446. raw_string = raw_string[5:]
  447. data_type = list
  448. else:
  449. data_type = list
  450. return raw_string, prefix, data_type
  451. def get_str_sheet(self, excel, sheet):
  452. columns = excel.parse(sheet).columns
  453. converters = {column: str for column in columns}
  454. data = excel.parse(sheet, converters=converters)
  455. data.fillna("", inplace=True)
  456. return data
  457. def read_excel(self, path, sheet_name="", return_json=False):
  458. """
  459. This method will read the input excel file.
  460. It will read all the sheets inside this excel file and will create a dictionary of dictionary containing all data
  461. of every sheet.
  462. i.e. {"sheetName": {headers**: data**}}
  463. It will also look for a base sheet whose name must be given while creating the instance. If no sheet name is
  464. given then first sheet of the file will be considered as base sheet.
  465. Finally it will return a dictionary containing sheetNames:data of all sheets & dictionary of base sheet.
  466. :param path: Path to raw data xlsx file.
  467. :param sheet_name: Name of base sheet sheet where main input data is located. Default will be the first sheet.
  468. :return: Dictionary of all sheets and data, Dictionary of base sheet.
  469. """
  470. wb = pd.ExcelFile(path)
  471. sheet_lis = wb.sheet_names
  472. sheet_df = {}
  473. for sheet in sheet_lis:
  474. sheet_df[sheet] = self.get_str_sheet(wb, sheet)
  475. sheet_df[sheet].fillna("", inplace=True)
  476. if return_json:
  477. for df in sheet_df.keys():
  478. sheet_df[df] = json.loads(sheet_df[df].to_json(orient="records"))
  479. if sheet_name == "":
  480. base_sheet = sheet_df[sheet_lis[0]]
  481. else:
  482. assert sheet_name in sheet_df, f"Excel file doesn't contain {sheet_name} sheet. Please recheck."
  483. base_sheet = sheet_df[sheet_name]
  484. return sheet_df, base_sheet
  485. @staticmethod
  486. def __splitList(raw_data):
  487. """
  488. Will convert string list to python list.
  489. i.e. "[value1,value2,value3]" ==> ["value1","value2","value3"]
  490. :param raw_data: string of list
  491. :return: Python list
  492. """
  493. if raw_data[0] == "[" and raw_data[-1] == "]":
  494. data = raw_data[1:-1]
  495. else:
  496. data = raw_data
  497. proccesed_datas = [data.strip() for data in data.split(",")]
  498. return proccesed_datas
  499. def check_usecount(self, data):
  500. # used to find and return if their is usecount header and limit in input file
  501. usecount = False
  502. limit = 0
  503. usecount_header = None
  504. for header in data:
  505. if "usecount" in header.lower():
  506. usecount = True
  507. usecount_header = header
  508. if "usecount_" in header.lower():
  509. try:
  510. limit = int(header.lower().strip().split("count_")[1])
  511. except:
  512. limit = 0
  513. return usecount, limit, usecount_header
  514. def save_usecount(self):
  515. if self.noUpdateFiles:
  516. return
  517. for filename in self.isUsecount:
  518. logger.debug(f"Updating file {filename} with usecounts.")
  519. sheet_dict = self.rre_sheets[filename]
  520. ex = pd.ExcelFile(filename)
  521. for sheet in ex.sheet_names:
  522. if sheet in sheet_dict:
  523. continue
  524. df = self.get_str_sheet(ex, sheet)
  525. sheet_dict[sheet] = df
  526. with pd.ExcelWriter(filename) as writer:
  527. for sheetname in sheet_dict:
  528. sheet_dict[sheetname].to_excel(writer, sheetname, index=False)
  529. writer.save()
  530. logger.debug(f"File updated {filename}.")
  531. def update_usecount_in_source(self, data):
  532. if self.noUpdateFiles:
  533. return
  534. filename = self.usecount_dict[repr(data)]["file_name"]
  535. if not filename:
  536. filename = self.path
  537. if filename not in self.isUsecount:
  538. return
  539. if not self.isUsecount[filename]:
  540. return
  541. self.rre_sheets[filename][self.usecount_dict[repr(data)]["sheet_name"]][
  542. self.isUsecount[filename]][self.usecount_dict[repr(data)]["index"]] = self.usecount_dict[repr(data)]["use"]
  543. def __process_rrd_string(self, rrd_string):
  544. """
  545. This method is used to validate rrd_strings provided by the user.
  546. If their will be any error in string this fuction will immediately create an error and will stop further execution.
  547. Also these function will remove empty spaces around the commas in string.
  548. Regex supporting formats in this method are:
  549. ``RRD_[sheetName,TargetData,[Header:[values**],Header:[values**]]]``
  550. ``RRD_[sheetName,[TargetData**],[Header:[values**],Header:[values**]]]``
  551. ``RRD_(sheetName,[TargetData**],[Header:[values**],Header:[values**]])``
  552. ``RRD_[sheetName,*,[Header:[values**],Header:[values**]]]``
  553. ``RRD_[sheetName,*,[Header:[values**],Header:[values**]]]``
  554. ``RRD_[sheetName,TargetData,[]]``
  555. ``RRD_(sheetName,TargetData,[])``
  556. ``RRD_(sheetName,*,[])``
  557. ``RRD_[sheetName,*,[]]``
  558. :param rrd_string:
  559. :return:
  560. """
  561. processed_string = ','.join([word.strip() for word in rrd_string.split(', ')])
  562. match = re.match(r"(RRD_(\(|\[))[a-zA-z0-9\s]+,(\[?[a-zA-z\s,]+\]?|)|\*,\[([a-zA-z0-9\s]+:\[[a-zA-z0-9,\s]+\](,?))*\]",processed_string)
  563. err_string = f"{rrd_string} not matching pattern RRD_(sheetName,TargetData," \
  564. f"[Header1:[Value1],Header2:[Value1,Value2]])"
  565. assert match, err_string
  566. return processed_string
  567. def __process_rre_string(self, rrd_string):
  568. """
  569. This method is used to validate rrd_strings provided by the user.
  570. If their will be any error in string this fuction will immediately create an error and will stop further execution.
  571. Also these function will remove empty spaces around the commas in string.
  572. Regex supporting formats in this method are:
  573. ``RRE_[fileName,sheetName,TargetData,[Header:[values**],Header:[values**]]]``
  574. ``RRE_[fileName,sheetName,[TargetData**],[Header:[values**],Header:[values**]]]``
  575. ``RRE_(fileName,sheetName,[TargetData**],[Header:[values**],Header:[values**]])``
  576. ``RRE_[fileName,sheetName,*,[Header:[values**],Header:[values**]]]``
  577. ``RRE_[fileName,sheetName,*,[Header:[values**],Header:[values**]]]``
  578. ``RRE_[fileName,sheetName,TargetData,[]]``
  579. ``RRE_(fileName,sheetName,TargetData,[])``
  580. ``RRE_(fileName,sheetName,*,[])``
  581. ``RRE_[fileName,sheetName,*,[]]``
  582. :param rrd_string:
  583. :return:
  584. """
  585. processed_string = ','.join([word.strip() for word in rrd_string.split(', ')])
  586. match = re.match(r"(RRE_(\(|\[))[\w\d\s\-./\\]+\.(xlsx|xls),[a-zA-z0-9\s]+,(\[?[a-zA-z\s,]+\]?|)|\*,\[([a-zA-z0-9\s]+:\[[a-zA-z0-9,\s]+\](,?))*\]",processed_string)
  587. err_string = f"{rrd_string} not matching pattern RRE_(fileName, sheetName, TargetData," \
  588. f"[Header1:[Value1],Header2:[Value1,Value2]])"
  589. assert match, err_string
  590. return processed_string
  591. @staticmethod
  592. def get_env_variable(string):
  593. variable = string[1:-1].strip().split(',')[0].strip()
  594. data = os.environ.get(variable)
  595. try:
  596. if not data:
  597. data = string[1:-1].strip().split(',')[1].strip()
  598. logger.info(f"{variable} not found in environment, using {data} instead")
  599. except:
  600. raise BaseException(f"Can't find {variable} in envrionment & default value is also not set")
  601. return data
  602. if __name__ == "__main__":
  603. lTestDataGenerator = TestDataGenerator("../../tests/0TestInput/RawTestData.xlsx")
  604. lTestDataGenerator.write()