Akash Singh 3 years ago
parent
commit
9baf5e1e26
3 changed files with 98 additions and 0 deletions
  1. 95 0
      data_mover.py
  2. 2 0
      requirements.txt
  3. 1 0
      xlsx_write.py

+ 95 - 0
data_mover.py

@@ -0,0 +1,95 @@
+import xlrd3 as xlrd
+from openpyxl import load_workbook
+
+
+class Writer:
+    """
+    This class is made to update existing excel file.
+    First it will open the file in python and then we can do multiple writes and once everything is update we can use
+    save method in order to save the updated excel file. Hence, this class is very useful is saving time while updating
+    excel files.
+    """
+    def __init__(self, path, sheet):
+        self.path = path
+        self.sht = sheet
+        self.workbook = load_workbook(self.path)
+        self.sheet = self.workbook[self.sht]
+
+    def write(self, row, col, data):
+        # Update the values using row and col number.
+        # Note :- We are using openpyxl so row & column index will start from 1 instead of 0
+        self.sheet.cell(row, col).value = data
+
+    def save(self):
+        # Call this method to save the file once every updates are written
+        self.workbook.save(self.path)
+
+
+class Mover:
+    """
+    This class is used to copy data from one excel file to another. We can also use some filters to copy only targeted
+    data.
+    """
+    def __init__(self, source_file_path, source_sheet, destination_file_path, destination_sheet, lines=0):
+        """
+
+        :param source_file_path: Path with filename of source excel file
+        :param source_sheet: Sheet name of the source
+        :param destination_file_path: Path with filename of Destination excel file
+        :param destination_sheet: Sheet name of the destination
+        :param lines: Number of lines starting from 1 to be considered for moving from source.
+        """
+        self.source_file_path = source_file_path
+        self.source_sheet = source_sheet
+        self.destination_file_path = destination_file_path
+        self.destination_sheet = destination_sheet
+        self.lines = lines
+
+    def move(self, filters={}, add_missing_columns=False):
+        source = self.read_xlsx(self.source_file_path, self.source_sheet)
+        destination = self.read_xlsx(self.destination_file_path, self.destination_sheet)
+        destination_wb = Writer(self.destination_file_path, self.destination_sheet)  # Writer class object used to update existing file
+        if add_missing_columns:
+            self.add_missing_columns(source, destination, destination_wb)
+            # again opening destination file as it is updated with the source file columns
+            destination = self.read_xlsx(self.destination_file_path, self.destination_sheet)
+
+        end = self.lines or source.nrows  # if number of rows to be considered from source is not predefined the take all
+        new_data = [source.row(row) for row in range(1, end)]  # create a new list of all data and remove the filtered data
+        remove_data = []  # rows not matching filter are stored here which is used later to remove data from new_data
+        for filter in filters:  # iterate through the dictionary of filter and
+            ind = [x.value for x in source.row(0)].index(filter)  # getting index of filter header then use the same index to check data
+            for row in new_data:
+                if row[ind].value != filters[filter]:  # check if data is matching with filter
+                    remove_data.append(row)
+            for row in remove_data:  # removing unmatched data from new_data list
+                new_data.remove(row)
+        row_num = destination.nrows  # used to maintain new row number
+        for data in new_data:  # iterating through the data to be written and writing then on the correct cells
+            row_num += 1
+            for cell in range(len(data)):
+                try:
+                    # getting column number where new data is to be written with the help of indexing header in destination file
+                    ind = [x.value for x in destination.row(0)].index(source.row(0)[cell].value) + 1
+                except ValueError:
+                    # if add_missing_columns is false then ValueError is thrown for the headers which are not present in destination
+                    continue
+                destination_wb.write(row_num, ind, data[cell].value)
+        destination_wb.save()
+
+    def read_xlsx(self, path, sheet):
+        # reading xlsx file using xlrd
+        wb = xlrd.open_workbook(path)
+        sht = wb.sheet_by_name(sheet)
+        return sht
+
+    def add_missing_columns(self, source, destination, destination_wb):
+        # looking for the headers which are not present in destination file and then updating destination file
+        source_headers = [x.value for x in source.row(0)]
+        destination_headers = [x.value for x in destination.row(0)]
+        col = len(destination_headers)
+        for headers in source_headers:
+            if headers not in destination_headers:
+                col += 1
+                destination_wb.write(1, col, headers)
+        destination_wb.save()

+ 2 - 0
requirements.txt

@@ -0,0 +1,2 @@
+xlrd3~=1.0.0
+openpyxl~=3.0.3

+ 1 - 0
xlsx_write.py

@@ -0,0 +1 @@
+