TestDataGenerator.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. import csv
  2. import itertools
  3. import xlsxwriter
  4. import errno
  5. import os
  6. import logging
  7. import faker
  8. from random import sample, choice
  9. import baangt.base.GlobalConstants as GC
  10. import re
  11. import sys
  12. import pandas as pd
  13. from CloneXls import CloneXls
  14. import json
  15. logger = logging.getLogger("pyC")
  16. class PrefixData:
  17. def __init__(self, dataList, prefix, tdg_object=None):
  18. self.dataList = dataList
  19. self.prefix = prefix
  20. self.tdg_object = tdg_object
  21. self.process()
  22. def process(self):
  23. if self.prefix.lower() == "rrd" or self.prefix.lower() == "rre":
  24. self.dataList = [
  25. data for data in self.dataList if not self.tdg_object.usecount_dict[repr(data)]["limit"] or \
  26. self.tdg_object.usecount_dict[repr(data)]['use'] < self.tdg_object.usecount_dict[repr(data)]['limit']
  27. ]
  28. elif self.prefix.lower() == "fkr":
  29. fake = faker.Faker(self.dataList[1])
  30. fake_lis = []
  31. if len(self.dataList) == 3:
  32. if int(self.dataList[2]) == 0:
  33. fake_lis.append([fake, self.dataList[0]])
  34. fake_lis = tuple(fake_lis)
  35. else:
  36. for x in range(int(self.dataList[2])):
  37. fake_lis.append(getattr(fake, self.dataList[0])())
  38. else:
  39. for x in range(5):
  40. fake_lis.append(getattr(fake, self.dataList[0])())
  41. self.dataList = fake_lis
  42. def return_random(self):
  43. if self.prefix == "rre" or self.prefix == "rrd":
  44. if not len(self.dataList):
  45. raise BaseException(f"Not enough data, please verify if data is present or usecount limit" \
  46. "has reached!!")
  47. data = choice(self.dataList)
  48. self.tdg_object.usecount_dict[repr(data)]['use'] += 1
  49. if self.tdg_object.usecount_dict[repr(data)]['limit'] and \
  50. self.tdg_object.usecount_dict[repr(data)]['use'] >= self.tdg_object.usecount_dict[repr(data)]['limit']:
  51. self.dataList.remove(data)
  52. return data
  53. elif self.prefix.lower() == "fkr":
  54. if type(self.dataList) == tuple:
  55. return getattr(self.dataList[0][0], self.dataList[0][1])()
  56. return choice(self.dataList)
  57. elif self.prefix == 'rnd':
  58. return choice(self.dataList)
  59. class TestDataGenerator:
  60. """
  61. TestDataGenerator Class is to used to create a TestData file from raw excel file containing all possible values.
  62. Formats accepted in input excel file:
  63. 1. Value = ``<value>``
  64. 2. list of values = ``[<value1>,<value2>]``
  65. 3. range = ``<start>-<end>,<step>``
  66. 4. random = ``RND_[list]``
  67. 5. random from range = ``RND_<start>-<end>,<step>``
  68. 6. List of header = ``[<title1>, <title2>, <title3>]``
  69. 7. Faker Prefix = ``FKR_(<type>, <locale>, <number_of_data>)``
  70. 8. RRD Prefix = ``RRD_(<sheetName>,<TargetData>,[<Header1>:[<Value1>],<Header2>:[<Value1>,<Value2>]])``
  71. :param rawExcelPath: Takes input path for xlsx file containing input data.
  72. :param sheetName: Name of sheet where all base data is located.
  73. :method write: Will write the final processed data in excel/csv file.
  74. """
  75. def __init__(self, rawExcelPath=GC.TESTDATAGENERATOR_INPUTFILE, sheetName="",
  76. from_handleDatabase=False, noUpdate=True):
  77. self.path = os.path.abspath(rawExcelPath)
  78. self.sheet_name = sheetName
  79. if not os.path.isfile(self.path):
  80. raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), self.path)
  81. self.sheet_dict, self.raw_data_json = self.read_excel(self.path, self.sheet_name)
  82. self.rre_sheets = {}
  83. self.isUsecount = {}
  84. self.usecount_dict = {} # used to maintain usecount limit record and verify if that non of the data cross limit
  85. self.done = {}
  86. self.noUpdateFiles = noUpdate
  87. self.writers = {}
  88. if not from_handleDatabase:
  89. self.processed_datas = self.__process_data(self.raw_data_json)
  90. self.final_data = self.__generateFinalData(self.processed_datas)
  91. if self.isUsecount:
  92. if not self.noUpdateFiles:
  93. self.save_usecount() # saving source input file once everything is done
  94. def write(self, OutputFormat=GC.TESTDATAGENERATOR_OUTPUT_FORMAT, batch_size=0, outputfile=None):
  95. """
  96. Will write the generated data in output file.
  97. :param OutputFormat: "xlsx" or "csv"
  98. :param batch_size: Number of data to be written in output file. Will be randomly selected.
  99. :param outputfile: name and path of outputfile.
  100. :return:
  101. """
  102. if batch_size > 0:
  103. if len(self.final_data) > batch_size:
  104. data_lis = self.final_data.sample(n=batch_size)
  105. else:
  106. data_lis = self.final_data
  107. logger.debug("Total final data is smaller than batch size.")
  108. else:
  109. data_lis = self.final_data
  110. if OutputFormat.lower() == "xlsx":
  111. if outputfile == None:
  112. outputfile = GC.TESTDATAGENERATOR_OUTPUTFILE_XLSX
  113. with pd.ExcelWriter(outputfile) as writer:
  114. data_lis.to_excel(writer, index=False)
  115. writer.save()
  116. elif OutputFormat.lower() == "csv":
  117. if outputfile == None:
  118. outputfile = GC.TESTDATAGENERATOR_OUTPUTFILE_CSV
  119. data_lis.to_csv(outputfile)
  120. else:
  121. logger.debug("Incorrect file format")
  122. def __generateFinalData(self, processed_data):
  123. """
  124. :param processed_data:
  125. :return: Final_data_list
  126. """
  127. for dic in processed_data:
  128. for key in dic.copy():
  129. if type(dic[key]) == PrefixData:
  130. data = dic[key].return_random()
  131. if type(data) == dict:
  132. del dic[key]
  133. dic.update(data)
  134. else:
  135. dic[key] = data
  136. final_data = pd.DataFrame(processed_data)
  137. return final_data
  138. def __process_data(self, raw_json):
  139. """
  140. This method is used to Process all the raw unprocessed data read from the excel file.
  141. It will first send the header to ``__data_generator`` so that if it is a list then it will get converted in
  142. individual header.
  143. Later it will process the values using ``__data_generator``.
  144. It will then check returned iterable type, if it is a tuple that mean input value was with prefix, so, it will
  145. further check if the tuple contains dict. If True than prefix was RRD_. In that case we will have to deal with
  146. the original header of the input value. Because if the original value's header is not in the TargetData then this
  147. header will contain no value in the output file and my cause errors too. So the header will added in
  148. ``self.remove_header`` list which will be further used to remove it from main header list.
  149. Finally it will return list of dictionarys. Each dictionary contains processed data of a row of input file.
  150. Processed data are the raw data converted into python data type and iterables. Ranges are converted into list.
  151. :param raw_json:
  152. :return:
  153. """
  154. processed_datas = []
  155. raw_json = json.loads(raw_json.to_json(orient="records"))
  156. for raw_data in raw_json:
  157. if not list(raw_data.values())[0]:
  158. continue
  159. processed_data = {}
  160. for key in raw_data:
  161. keys = self.__splitList(key)
  162. for ke in keys:
  163. data = self.data_generators(raw_data[key])
  164. if type(data) != list:
  165. processed_data[ke] = [data]
  166. else:
  167. processed_data[ke] = data
  168. product = list(self.product_dict(**processed_data))
  169. processed_datas += product
  170. return processed_datas
  171. def data_generators(self, raw_data_old):
  172. """
  173. This method first send the data to ``__raw_data_string_process`` method to split the data and remove the unwanted
  174. spaces.
  175. Later this method uses other methods to convert all the different data_types from string to their respective
  176. python data types.
  177. i.e. string list to python list, etc.
  178. Later according to the prefix of data and the data_type assigned it will convert them.
  179. Simple list and strings are converted in to ``list`` type.
  180. Data with prefix will converted in to ``tuple`` type so further it will be helpful in distinguishing. Also it will
  181. insert the prefix name in first value of tuple if the prefix is ``FKR_`` so it will be helpful in further process.
  182. Finally it will return the iterable for further process.
  183. :param raw_data:
  184. :return: List or Tuple containing necessary data
  185. """
  186. raw_data, prefix, data_type = self.__raw_data_string_process(raw_data_old)
  187. if len(raw_data)<=1:
  188. return [""]
  189. if prefix == "Rnd":
  190. if "-" in raw_data:
  191. raw_data = raw_data.split('-')
  192. start = raw_data[0].strip()
  193. end = raw_data[1].strip()
  194. step = 1
  195. if "," in end:
  196. raw_data = end.split(",")
  197. end = raw_data[0].strip()
  198. step = raw_data[1].strip()
  199. processed_datas = [x for x in range(int(start), int(end) + 1, int(step))]
  200. else:
  201. processed_datas = self.__splitList(raw_data)
  202. processed_datas = PrefixData(processed_datas, 'rnd')
  203. elif prefix == "Faker":
  204. dataList = [data.strip() for data in raw_data[1:-1].split(",")]
  205. processed_datas = PrefixData(dataList, prefix="fkr")
  206. elif prefix == "Rrd":
  207. sheet_name, data_looking_for, data_to_match = self.extractDataFromRrd(raw_data)
  208. try:
  209. dataList = self.__processRrdRre(sheet_name, data_looking_for, data_to_match)
  210. processed_datas = PrefixData(dataList, prefix='rrd', tdg_object=self)
  211. except KeyError:
  212. sys.exit(f"Please check that source files contains all the headers mentioned in : {raw_data_old}")
  213. elif prefix == "Rre":
  214. file_name = raw_data[1:-1].split(',')[0].strip()
  215. sheet_name, data_looking_for, data_to_match = self.extractDataFromRrd(raw_data, index=1)
  216. try:
  217. dataList = self.__processRrdRre(sheet_name, data_looking_for, data_to_match, filename=file_name)
  218. processed_datas = PrefixData(dataList, prefix="rre", tdg_object=self)
  219. except KeyError:
  220. sys.exit(f"Please check that source files contains all the headers mentioned in : {raw_data_old}")
  221. elif prefix == "Renv":
  222. processed_datas = self.get_env_variable(raw_data)
  223. elif raw_data[0] == "[" and raw_data[-1] == "]":
  224. processed_datas = self.__splitList(raw_data)
  225. elif "-" in raw_data:
  226. raw_data_original = raw_data[:]
  227. raw_data = raw_data.split('-')
  228. start = raw_data[0].strip()
  229. end = raw_data[1].strip()
  230. step = 1
  231. if "," in end:
  232. raw_data = end.split(",")
  233. end = raw_data[0].strip()
  234. step = raw_data[1].strip()
  235. try:
  236. processed_datas = [x for x in range(int(start), int(end)+1, int(step))]
  237. except:
  238. processed_datas = [raw_data_original.strip()]
  239. else:
  240. processed_datas = raw_data.strip()
  241. return processed_datas
  242. def extractDataFromRrd(self, raw_data, index=0):
  243. first_value = raw_data[1:-1].split(',')[0+index].strip()
  244. second_value = raw_data[1:-1].split(',')[1+index].strip()
  245. if second_value[0] == "[":
  246. second_value = ','.join(raw_data[1:-1].split(',')[1+index:]).strip()
  247. second_value = second_value[:second_value.index(']') + 1]
  248. third_value = [x.strip() for x in ']'.join(raw_data[1:-1].split(']')[1:]).split(',')[1:]]
  249. else:
  250. third_value = [x.strip() for x in raw_data[1:-1].split(',')[2+index:]]
  251. evaluated_list = ']],'.join(','.join(third_value)[1:-1].strip().split('],')).split('],')
  252. if evaluated_list[0] == "":
  253. evaluated_dict = {}
  254. else:
  255. evaluated_dict = {
  256. splited_data.split(':')[0]: self.__splitList(splited_data.split(':')[1]) for splited_data in
  257. evaluated_list
  258. }
  259. if second_value[0] == "[" and second_value[-1] == "]":
  260. second_value = self.__splitList(second_value)
  261. return first_value, second_value, evaluated_dict
  262. def __processRrdRre(self, sheet_name, data_looking_for, data_to_match: dict, filename=None):
  263. if filename:
  264. filename = os.path.join(os.path.dirname(self.path), filename)
  265. if not self.noUpdateFiles:
  266. file_name = ".".join(filename.split(".")[:-1])
  267. file_extension = filename.split(".")[-1]
  268. file = file_name + "_baangt" + "." + file_extension
  269. else:
  270. file = filename
  271. if not file in self.rre_sheets:
  272. logger.debug(f"Creating clone file of: {filename}")
  273. if not self.noUpdateFiles:
  274. filename = CloneXls(filename).update_or_make_clone()
  275. self.rre_sheets[filename] = {}
  276. filename = file
  277. if sheet_name in self.rre_sheets[filename]:
  278. df = self.rre_sheets[filename][sheet_name]
  279. else:
  280. df = pd.read_excel(filename, sheet_name, dtype=str)
  281. df.fillna("", inplace=True)
  282. self.rre_sheets[filename][sheet_name] = df
  283. else:
  284. df = self.sheet_dict[sheet_name]
  285. if not self.path in self.rre_sheets:
  286. self.rre_sheets[self.path] = {}
  287. if not sheet_name in self.rre_sheets[self.path]:
  288. self.rre_sheets[self.path][sheet_name] = df
  289. df1 = df.copy()
  290. for key, value in data_to_match.items():
  291. if not isinstance(value, list):
  292. value = [value]
  293. df1 = df1.loc[df1[key].isin(value)]
  294. data_lis = []
  295. if type(data_looking_for) == str:
  296. data_looking_for = data_looking_for.split(",")
  297. data_new_header = {}
  298. data_looking_for_old = data_looking_for[:]
  299. data_looking_for = []
  300. for header in data_looking_for_old:
  301. if ":" in header:
  302. old_header = header.split(":")[0].strip()
  303. new_header = header.split(":")[1].strip()
  304. else:
  305. old_header = header
  306. new_header = header
  307. data_new_header[old_header] = new_header
  308. data_looking_for.append(header)
  309. key_name = repr(sheet_name) + repr(data_looking_for) + repr(data_to_match) + repr(filename)
  310. if key_name in self.done:
  311. logger.debug(f"Data Gathered from previously saved data.")
  312. return self.done[key_name]
  313. usecount, limit, usecount_header = self.check_usecount(df.columns.values.tolist())
  314. if not filename:
  315. if self.path not in self.isUsecount:
  316. self.isUsecount[self.path] = usecount_header
  317. if not self.isUsecount[self.path]:
  318. self.isUsecount[self.path] = usecount_header
  319. else:
  320. if filename not in self.isUsecount:
  321. self.isUsecount[filename] = usecount_header
  322. if not self.isUsecount[filename]:
  323. self.isUsecount[filename] = usecount_header
  324. df1_dict = df1.to_dict(orient="index")
  325. for index in df1_dict:
  326. data = df1_dict[index]
  327. if usecount_header:
  328. try:
  329. used_limit = int(data[usecount_header])
  330. except:
  331. used_limit = 0
  332. else:
  333. used_limit = 0
  334. if data_looking_for[0] == "*":
  335. if usecount_header:
  336. del data[usecount_header]
  337. data_lis.append(data)
  338. self.usecount_dict[repr(data)] = {
  339. "use": used_limit, "limit": limit, "index": index,
  340. "sheet_name": sheet_name, "file_name": filename
  341. }
  342. else:
  343. dt = {header: data[keys] for (keys, header) in zip(data_looking_for, data_looking_for_old)}
  344. data_lis.append(dt)
  345. self.usecount_dict[repr(dt)] = {
  346. "use": used_limit, "limit": limit, "index": index,
  347. "sheet_name": sheet_name, "file_name": filename
  348. }
  349. if len(data_lis) == 0:
  350. logger.info(f"No data matching: {data_to_match}")
  351. sys.exit(f"No data matching: {data_to_match}")
  352. logger.debug(f"New Data Gathered.")
  353. self.done[key_name] = data_lis
  354. return data_lis
  355. def __raw_data_string_process(self, raw_string):
  356. """
  357. Returns ``String, prefix, data_type`` which are later used to decided the process to perform on string.
  358. Their depth explanation are written in the function where they are used.
  359. It will process the value string of all cells in the input sheet.
  360. It will first convert all floats into string as by default xlrd ints are converted in float
  361. Later it will check if the string size is greater than 4 or not. If not then it will simply return the values,
  362. else it will process further.
  363. If string has more than 4 characters, this method will look if the fourth character is "_" or not. If not it will
  364. return the values. Else it mean there is prefix in string and it will process further.
  365. Later it will split the prefix from the value and define the data_type according to the string.
  366. If their is no matching prefix then the data type wil be list else it will be tuple.
  367. :param raw_string:
  368. :return: String of values, prefix, Data_type
  369. """
  370. if type(raw_string) == float:
  371. raw_string = int(raw_string)
  372. raw_string = str(raw_string).strip()
  373. prefix = ""
  374. if len(raw_string)>4:
  375. if raw_string[3] == "_":
  376. if raw_string[:4].lower() == "rnd_":
  377. prefix = "Rnd"
  378. raw_string = raw_string[4:]
  379. data_type = tuple
  380. elif raw_string[:4].lower() == "fkr_":
  381. prefix = "Faker"
  382. raw_string = raw_string[4:]
  383. data_type = tuple
  384. elif raw_string[:4].lower() == "rrd_": # Remote Random (Remote = other sheet)
  385. prefix = "Rrd"
  386. raw_string = self.__process_rrd_string(raw_string)
  387. raw_string = raw_string[4:]
  388. data_type = tuple
  389. elif raw_string[:4].lower() == "rre_": # Remote Random (Remote = other sheet)
  390. prefix = "Rre"
  391. raw_string = self.__process_rre_string(raw_string)
  392. raw_string = raw_string[4:]
  393. data_type = tuple
  394. else:
  395. data_type = list
  396. else:
  397. if raw_string[:5].lower() == "renv_":
  398. prefix = "Renv"
  399. raw_string = raw_string[5:]
  400. data_type = list
  401. else:
  402. data_type = list
  403. return raw_string, prefix, data_type
  404. def get_str_sheet(self, excel, sheet):
  405. columns = excel.parse(sheet).columns
  406. converters = {column: str for column in columns}
  407. data = excel.parse(sheet, converters=converters)
  408. data.fillna("", inplace=True)
  409. return data
  410. def read_excel(self, path, sheet_name="", return_json=False):
  411. """
  412. This method will read the input excel file.
  413. It will read all the sheets inside this excel file and will create a dictionary of dictionary containing all data
  414. of every sheet.
  415. i.e. {"sheetName": {headers**: data**}}
  416. It will also look for a base sheet whose name must be given while creating the instance. If no sheet name is
  417. given then first sheet of the file will be considered as base sheet.
  418. Finally it will return a dictionary containing sheetNames:data of all sheets & dictionary of base sheet.
  419. :param path: Path to raw data xlsx file.
  420. :param sheet_name: Name of base sheet sheet where main input data is located. Default will be the first sheet.
  421. :return: Dictionary of all sheets and data, Dictionary of base sheet.
  422. """
  423. wb = pd.ExcelFile(path)
  424. sheet_lis = wb.sheet_names
  425. sheet_df = {}
  426. for sheet in sheet_lis:
  427. sheet_df[sheet] = self.get_str_sheet(wb, sheet)
  428. sheet_df[sheet].fillna("", inplace=True)
  429. if return_json:
  430. for df in sheet_df.keys():
  431. sheet_df[df] = json.loads(sheet_df[df].to_json(orient="records"))
  432. if sheet_name == "":
  433. base_sheet = sheet_df[sheet_lis[0]]
  434. else:
  435. assert sheet_name in sheet_df, f"Excel file doesn't contain {sheet_name} sheet. Please recheck."
  436. base_sheet = sheet_df[sheet_name]
  437. return sheet_df, base_sheet
  438. @staticmethod
  439. def __splitList(raw_data):
  440. """
  441. Will convert string list to python list.
  442. i.e. "[value1,value2,value3]" ==> ["value1","value2","value3"]
  443. :param raw_data: string of list
  444. :return: Python list
  445. """
  446. if raw_data[0] == "[" and raw_data[-1] == "]":
  447. data = raw_data[1:-1]
  448. else:
  449. data = raw_data
  450. proccesed_datas = [data.strip() for data in data.split(",")]
  451. return proccesed_datas
  452. def check_usecount(self, data):
  453. # used to find and return if their is usecount header and limit in input file
  454. usecount = False
  455. limit = 0
  456. usecount_header = None
  457. for header in data:
  458. if "usecount" in header.lower():
  459. usecount = True
  460. usecount_header = header
  461. if "usecount_" in header.lower():
  462. try:
  463. limit = int(header.lower().strip().split("count_")[1])
  464. except:
  465. limit = 0
  466. return usecount, limit, usecount_header
  467. def save_usecount(self):
  468. if self.noUpdateFiles:
  469. return
  470. for filename in self.isUsecount:
  471. logger.debug(f"Updating file {filename} with usecounts.")
  472. sheet_dict = self.rre_sheets[filename]
  473. ex = pd.ExcelFile(filename)
  474. for sheet in ex.sheet_names:
  475. if sheet in sheet_dict:
  476. continue
  477. df = self.get_str_sheet(ex, sheet)
  478. sheet_dict[sheet] = df
  479. with pd.ExcelWriter(filename) as writer:
  480. for sheetname in sheet_dict:
  481. sheet_dict[sheetname].to_excel(writer, sheetname, index=False)
  482. writer.save()
  483. logger.debug(f"File updated {filename}.")
  484. def update_usecount_in_source(self, data):
  485. if self.noUpdateFiles:
  486. return
  487. filename = self.usecount_dict[repr(data)]["file_name"]
  488. if not filename:
  489. filename = self.path
  490. if filename not in self.isUsecount:
  491. return
  492. if not self.isUsecount[filename]:
  493. return
  494. self.rre_sheets[filename][self.usecount_dict[repr(data)]["sheet_name"]][
  495. self.isUsecount[filename]][self.usecount_dict[repr(data)]["index"]] = self.usecount_dict[repr(data)]["use"]
  496. def __process_rrd_string(self, rrd_string):
  497. """
  498. This method is used to validate rrd_strings provided by the user.
  499. If their will be any error in string this fuction will immediately create an error and will stop further execution.
  500. Also these function will remove empty spaces around the commas in string.
  501. Regex supporting formats in this method are:
  502. ``RRD_[sheetName,TargetData,[Header:[values**],Header:[values**]]]``
  503. ``RRD_[sheetName,[TargetData**],[Header:[values**],Header:[values**]]]``
  504. ``RRD_(sheetName,[TargetData**],[Header:[values**],Header:[values**]])``
  505. ``RRD_[sheetName,*,[Header:[values**],Header:[values**]]]``
  506. ``RRD_[sheetName,*,[Header:[values**],Header:[values**]]]``
  507. ``RRD_[sheetName,TargetData,[]]``
  508. ``RRD_(sheetName,TargetData,[])``
  509. ``RRD_(sheetName,*,[])``
  510. ``RRD_[sheetName,*,[]]``
  511. :param rrd_string:
  512. :return:
  513. """
  514. processed_string = ','.join([word.strip() for word in rrd_string.split(', ')])
  515. match = re.match(r"(RRD_(\(|\[))[a-zA-z0-9\s]+,(\[?[a-zA-z\s,]+\]?|)|\*,\[([a-zA-z0-9\s]+:\[[a-zA-z0-9,\s]+\](,?))*\]",processed_string)
  516. err_string = f"{rrd_string} not matching pattern RRD_(sheetName,TargetData," \
  517. f"[Header1:[Value1],Header2:[Value1,Value2]])"
  518. assert match, err_string
  519. return processed_string
  520. def __process_rre_string(self, rrd_string):
  521. """
  522. This method is used to validate rrd_strings provided by the user.
  523. If their will be any error in string this fuction will immediately create an error and will stop further execution.
  524. Also these function will remove empty spaces around the commas in string.
  525. Regex supporting formats in this method are:
  526. ``RRE_[fileName,sheetName,TargetData,[Header:[values**],Header:[values**]]]``
  527. ``RRE_[fileName,sheetName,[TargetData**],[Header:[values**],Header:[values**]]]``
  528. ``RRE_(fileName,sheetName,[TargetData**],[Header:[values**],Header:[values**]])``
  529. ``RRE_[fileName,sheetName,*,[Header:[values**],Header:[values**]]]``
  530. ``RRE_[fileName,sheetName,*,[Header:[values**],Header:[values**]]]``
  531. ``RRE_[fileName,sheetName,TargetData,[]]``
  532. ``RRE_(fileName,sheetName,TargetData,[])``
  533. ``RRE_(fileName,sheetName,*,[])``
  534. ``RRE_[fileName,sheetName,*,[]]``
  535. :param rrd_string:
  536. :return:
  537. """
  538. processed_string = ','.join([word.strip() for word in rrd_string.split(', ')])
  539. match = re.match(r"(RRE_(\(|\[))[\w\d\s\-./\\]+\.(xlsx|xls),[a-zA-z0-9\s]+,(\[?[a-zA-z\s,]+\]?|)|\*,\[([a-zA-z0-9\s]+:\[[a-zA-z0-9,\s]+\](,?))*\]",processed_string)
  540. err_string = f"{rrd_string} not matching pattern RRE_(fileName, sheetName, TargetData," \
  541. f"[Header1:[Value1],Header2:[Value1,Value2]])"
  542. assert match, err_string
  543. return processed_string
  544. @staticmethod
  545. def get_env_variable(string):
  546. variable = string[1:-1].strip().split(',')[0].strip()
  547. data = os.environ.get(variable)
  548. try:
  549. if not data:
  550. data = string[1:-1].strip().split(',')[1].strip()
  551. logger.info(f"{variable} not found in environment, using {data} instead")
  552. except:
  553. raise BaseException(f"Can't find {variable} in envrionment & default value is also not set")
  554. return data
  555. @staticmethod
  556. def product_dict(**kwargs):
  557. keys = kwargs.keys()
  558. vals = kwargs.values()
  559. for instance in itertools.product(*vals):
  560. yield dict(zip(keys, instance))
  561. if __name__ == "__main__":
  562. lTestDataGenerator = TestDataGenerator("../../tests/0TestInput/RawTestData.xlsx")
  563. lTestDataGenerator.write()