|
@@ -1,4 +1,3 @@
|
|
|
-
|
|
|
from flask import Flask,request,jsonify,render_template,send_file,Response,redirect,url_for
|
|
|
import base64
|
|
|
from flask_bootstrap import Bootstrap
|
|
@@ -9,48 +8,135 @@ import PyPDF2
|
|
|
import difflib
|
|
|
import fitz
|
|
|
from zipfile import ZipFile
|
|
|
-
|
|
|
+import uuid
|
|
|
+import sys
|
|
|
+import os
|
|
|
|
|
|
ALLOWED_EXTENSIONS = {'pdf'}
|
|
|
app = Flask(__name__)
|
|
|
Bootstrap(app)
|
|
|
app.secret_key = '12345'
|
|
|
-app.run
|
|
|
def allowed_file(filename):
|
|
|
return '.' in filename and \
|
|
|
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
|
|
|
|
-@app.route('/uploadajax', methods=['GET', 'POST'])
|
|
|
-def upload_file():
|
|
|
- sqliteConnection = sqlite3.connect('baaangt.db')
|
|
|
+@app.route('/upload_reference_ajax', methods=['GET', 'POST'])
|
|
|
+def upload_reference_file():
|
|
|
+ uu_id = uuid.uuid1()
|
|
|
+ sqliteConnection = sqlite3.connect('baangt.db')
|
|
|
cursor = sqliteConnection.cursor()
|
|
|
print("Connected to SQLite")
|
|
|
- cursor.execute('''CREATE TABLE IF NOT EXISTS files (
|
|
|
- UUID integer PRIMARY KEY,
|
|
|
- original_pdf_name NOT NULL,
|
|
|
- original_pdf text NOT NULL,
|
|
|
- reference_pdf_name NOT NULL,
|
|
|
- reference_pdf text NOT NULL
|
|
|
-);''')
|
|
|
- query = """ INSERT INTO files
|
|
|
- (UUID,original_pdf_name, original_pdf,reference_pdf_name,reference_pdf) VALUES (?, ?, ?,?,?)"""
|
|
|
+ cursor.execute('''CREATE TABLE IF NOT EXISTS reference_file (
|
|
|
+ UUID NOT NULL,
|
|
|
+ reference_pdf_name NOT NULL,
|
|
|
+ reference_pdf text NOT NULL
|
|
|
+ );''')
|
|
|
+ query = """ INSERT INTO reference_file
|
|
|
+ (UUID,reference_pdf_name,reference_pdf) VALUES (?,?,?)"""
|
|
|
if request.method == 'POST':
|
|
|
if request.files:
|
|
|
- file_orig = request.files['original']
|
|
|
file_ref = request.files['reference']
|
|
|
- uuid = request.form['UUID']
|
|
|
- files_json = [{'file_orig':file_orig.filename,'file_ref':file_ref.filename,'uuid':uuid}]
|
|
|
+ files_json = [{'file_ref':file_ref.filename,'uuid':str(uu_id.int)}]
|
|
|
print(files_json)
|
|
|
- print(allowed_file(file_orig.filename))
|
|
|
+ # print(allowed_file(file_orig.filename))
|
|
|
try:
|
|
|
- if allowed_file(file_orig.filename) and allowed_file(file_ref.filename) and uuid:
|
|
|
- blob_orig = base64.b64encode(file_orig.read())
|
|
|
+ if allowed_file(file_ref.filename):
|
|
|
+ blob_ref = base64.b64encode(file_ref.read())
|
|
|
+ data_tuple = (str(uu_id.int),file_ref.filename,blob_ref)
|
|
|
+ cursor.execute(query,data_tuple)
|
|
|
+ sqliteConnection.commit()
|
|
|
+ cursor.close()
|
|
|
+ return jsonify(files_json)
|
|
|
+ else:
|
|
|
+ return Response("All fields must be selected", status=400, mimetype='application/json')
|
|
|
+ except Exception as e:
|
|
|
+ exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
|
+ fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
|
+ print(exc_type, fname, exc_tb.tb_lineno)
|
|
|
+ print(e)
|
|
|
+ return Response("Error in uploading", status=400, mimetype='application/json')
|
|
|
+ else:
|
|
|
+ uuid_value = request.json['uuid']
|
|
|
+ print(str(uuid_value))
|
|
|
+ sql = 'Delete from reference_file where UUID = "{}"'.format(str(uuid_value))
|
|
|
+ print(sql)
|
|
|
+ cursor.execute(sql)
|
|
|
+ sqliteConnection.commit()
|
|
|
+ else:
|
|
|
+ try:
|
|
|
+ cursor.execute('Select UUID, reference_pdf_name from reference_file')
|
|
|
+ db_data = [i for i in cursor.fetchall()]
|
|
|
+ print(db_data)
|
|
|
+ cursor.close()
|
|
|
+ # print(db_data)
|
|
|
+ return jsonify(db_data)
|
|
|
+ except:
|
|
|
+ return jsonify('')
|
|
|
+ return render_template('index.html')
|
|
|
+
|
|
|
+@app.route('/update_reference_ajax', methods=['GET', 'POST'])
|
|
|
+def update_reference_file():
|
|
|
+
|
|
|
+ sqliteConnection = sqlite3.connect('baangt.db')
|
|
|
+ cursor = sqliteConnection.cursor()
|
|
|
+ print("Connected to SQLite")
|
|
|
+
|
|
|
+ query = """ UPDATE reference_file
|
|
|
+ SET reference_pdf_name = :name , reference_pdf = :file
|
|
|
+ WHERE UUID = :uuid
|
|
|
+ """
|
|
|
+ if request.method == 'POST':
|
|
|
+ if request.files:
|
|
|
+ uuid = request.form['uuid']
|
|
|
+ file_ref = request.files['reference']
|
|
|
+ files_json = [{'file_ref':file_ref.filename,'uuid':str(uuid)}]
|
|
|
+ # print(files_json)
|
|
|
+ # print(allowed_file(file_orig.filename))
|
|
|
+ try:
|
|
|
+ if allowed_file(file_ref.filename):
|
|
|
blob_ref = base64.b64encode(file_ref.read())
|
|
|
- cursor.execute('select uuid from files where uuid = {}'.format(int(uuid)))
|
|
|
- data = [i for i in cursor.fetchall()]
|
|
|
- if len(data)!= 0:
|
|
|
- return Response("UUID Already Exists", status=400, mimetype='application/json')
|
|
|
- data_tuple = (uuid,file_orig.filename,blob_orig,file_ref.filename,blob_ref)
|
|
|
+ data_tuple = (file_ref.filename,blob_ref,str(uuid))
|
|
|
+ data = {'name' : file_ref.filename, 'file' : blob_ref, 'uuid' : str(uuid)}
|
|
|
+ cursor.execute(query,data)
|
|
|
+ sqliteConnection.commit()
|
|
|
+ cursor.close()
|
|
|
+ return jsonify(files_json)
|
|
|
+ else:
|
|
|
+ return Response("All fields must be selected", status=400, mimetype='application/json')
|
|
|
+ except Exception as e:
|
|
|
+ exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
|
+ fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
|
+ print(exc_type, fname, exc_tb.tb_lineno)
|
|
|
+ print(e)
|
|
|
+ return Response("Error in uploading", status=400, mimetype='application/json')
|
|
|
+
|
|
|
+ return render_template('index.html')
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+@app.route('/upload_original_ajax', methods=['GET', 'POST'])
|
|
|
+def upload_original_file():
|
|
|
+ uu_id = uuid.uuid1()
|
|
|
+ sqliteConnection = sqlite3.connect('baangt.db')
|
|
|
+ cursor = sqliteConnection.cursor()
|
|
|
+ print("Connected to SQLite")
|
|
|
+ cursor.execute('''CREATE TABLE IF NOT EXISTS original_file (
|
|
|
+ UUID NOT NULL,
|
|
|
+ original_pdf_name NOT NULL,
|
|
|
+ original_pdf text NOT NULL
|
|
|
+ );''')
|
|
|
+ query = """ INSERT INTO original_file
|
|
|
+ (UUID,original_pdf_name,original_pdf) VALUES (?,?,?)"""
|
|
|
+ if request.method == 'POST':
|
|
|
+ if request.files:
|
|
|
+ file_orig = request.files['original']
|
|
|
+ files_json = [{'file_orig':file_orig.filename,'uuid':str(uu_id.int)}]
|
|
|
+ print(files_json)
|
|
|
+ # print(allowed_file(file_orig.filename))
|
|
|
+ try:
|
|
|
+ if allowed_file(file_orig.filename):
|
|
|
+ blob_orig = base64.b64encode(file_orig.read())
|
|
|
+ data_tuple = (str(uu_id.int),file_orig.filename,blob_orig)
|
|
|
cursor.execute(query,data_tuple)
|
|
|
sqliteConnection.commit()
|
|
|
cursor.close()
|
|
@@ -58,231 +144,257 @@ def upload_file():
|
|
|
else:
|
|
|
return Response("All fields must be selected", status=400, mimetype='application/json')
|
|
|
except Exception as e:
|
|
|
+ exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
|
+ fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
|
+ print(exc_type, fname, exc_tb.tb_lineno)
|
|
|
+ print(e)
|
|
|
return Response("Error in uploading", status=400, mimetype='application/json')
|
|
|
else:
|
|
|
- uuid_value = int(request.json['uuid'])
|
|
|
- print(type(uuid_value))
|
|
|
- cursor.execute('Delete from files where UUID = {}'.format(uuid_value))
|
|
|
+ uuid_value = request.json['uuid']
|
|
|
+ print(str(uuid_value))
|
|
|
+ sql = 'Delete from original_file where UUID = "{}"'.format(str(uuid_value))
|
|
|
+ print(sql)
|
|
|
+ cursor.execute(sql)
|
|
|
sqliteConnection.commit()
|
|
|
- print(request.json['uuid'])
|
|
|
else:
|
|
|
try:
|
|
|
- cursor.execute('Select UUID, original_pdf_name,reference_pdf_name from files')
|
|
|
+ cursor.execute('Select UUID, original_pdf_name from original_file')
|
|
|
db_data = [i for i in cursor.fetchall()]
|
|
|
+ print(db_data)
|
|
|
cursor.close()
|
|
|
# print(db_data)
|
|
|
return jsonify(db_data)
|
|
|
except:
|
|
|
return jsonify('')
|
|
|
return render_template('index.html')
|
|
|
+
|
|
|
+
|
|
|
@app.route('/')
|
|
|
def index():
|
|
|
return render_template('index.html')
|
|
|
+# /<uuid1, uuid2>
|
|
|
+@app.route('/comparison',methods=['POST', 'GET'])
|
|
|
+def comparison_():
|
|
|
+ if request.method == 'GET':
|
|
|
+ try:
|
|
|
+ uuid1 = request.args.get('uuid1', None)
|
|
|
+ uuid2 = request.args.get('uuid2', None)
|
|
|
+ print(uuid1,uuid2)
|
|
|
+
|
|
|
+ sqliteConnection = sqlite3.connect('baangt.db')
|
|
|
+ cursor = sqliteConnection.cursor()
|
|
|
+
|
|
|
+ orig_sql = 'Select original_pdf from original_file where UUID = "{}"'.format(str(uuid1))
|
|
|
+ cursor.execute(orig_sql)
|
|
|
+ blob = cursor.fetchone()
|
|
|
+ blob_orig = base64.b64decode(blob[0])
|
|
|
+
|
|
|
+ ref_sql = 'Select reference_pdf from reference_file where UUID = "{}"'.format(str(uuid2))
|
|
|
+ cursor.execute(ref_sql)
|
|
|
+ blob = cursor.fetchone()
|
|
|
+ blob_ref = base64.b64decode(blob[0])
|
|
|
+ with open('temp/temp_orig.pdf', 'wb') as f:
|
|
|
+ f.write(blob_orig)
|
|
|
+ with open('temp/temp_ref.pdf', 'wb') as f:
|
|
|
+ f.write(blob_ref)
|
|
|
+
|
|
|
+ input_file1 = 'temp/temp_orig.pdf'
|
|
|
+ input_file2 = 'temp/temp_ref.pdf'
|
|
|
+
|
|
|
+ output_file1 = 'output/Original_file.pdf'
|
|
|
+ output_file2 = 'output/Reference_file.pdf'
|
|
|
+ print('Comparing files ', input_file1, ' and ', input_file2, '.....')
|
|
|
+
|
|
|
+ fullText1 = ""
|
|
|
+
|
|
|
+ pdfFileObj = open(input_file1, 'rb')
|
|
|
+
|
|
|
+ #The pdfReader variable is a readable object that will be parsed
|
|
|
+ pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
|
|
+
|
|
|
+ #discerning the number of pages will allow us to parse through all #the pages
|
|
|
+ num_pages = pdfReader.numPages
|
|
|
+ count = 0
|
|
|
+ text = ""
|
|
|
+ pages1 = []
|
|
|
+
|
|
|
+ #The while loop will read each page
|
|
|
+ while count < num_pages:
|
|
|
+ pageObj = pdfReader.getPage(count)
|
|
|
+ count +=1
|
|
|
+ temp = pageObj.extractText()
|
|
|
+ text += temp
|
|
|
+ pages1.append(temp)
|
|
|
+
|
|
|
+ fullText1 = text
|
|
|
+ fullText1 = fullText1.replace('\n', ' ')
|
|
|
+ fullText1 = fullText1.replace(' \n', ' ')
|
|
|
+ fullText1 = re.sub(' +', ' ', fullText1)
|
|
|
+
|
|
|
+ while True:
|
|
|
+ try:
|
|
|
+ inz = fullText1.index('Seite')
|
|
|
+ temp = ' '.join(fullText1[inz:].split()[:4])
|
|
|
+ fullText1 = fullText1.replace(temp, '')
|
|
|
+ except:
|
|
|
+ break
|
|
|
|
|
|
-@app.route('/comparison/<uuid>',methods=['GET'])
|
|
|
-def comparison_(uuid):
|
|
|
- try:
|
|
|
- # uuid = int(request.json['uuid'])
|
|
|
- uuid = int(uuid)
|
|
|
- sqliteConnection = sqlite3.connect('baaangt.db')
|
|
|
- cursor = sqliteConnection.cursor()
|
|
|
- cursor.execute("Select UUID, original_pdf,reference_pdf from files where uuid = {}".format(uuid))
|
|
|
- blob = cursor.fetchone()
|
|
|
- blob_orig = base64.b64decode(blob[1])
|
|
|
- blob_ref = base64.b64decode(blob[2])
|
|
|
- with open('temp/temp_orig.pdf', 'wb') as f:
|
|
|
- f.write(blob_orig)
|
|
|
- with open('temp/temp_ref.pdf', 'wb') as f:
|
|
|
- f.write(blob_ref)
|
|
|
-
|
|
|
- input_file1 = 'temp/temp_orig.pdf'
|
|
|
- input_file2 = 'temp/temp_ref.pdf'
|
|
|
-
|
|
|
- output_file1 = 'output/Original_file.pdf'
|
|
|
- output_file2 = 'output/Reference_file.pdf'
|
|
|
- print('Comparing files ', input_file1, ' and ', input_file2, '.....')
|
|
|
-
|
|
|
- fullText1 = ""
|
|
|
-
|
|
|
- pdfFileObj = open(input_file1, 'rb')
|
|
|
-
|
|
|
- #The pdfReader variable is a readable object that will be parsed
|
|
|
- pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
|
|
-
|
|
|
- #discerning the number of pages will allow us to parse through all #the pages
|
|
|
- num_pages = pdfReader.numPages
|
|
|
- count = 0
|
|
|
- text = ""
|
|
|
- pages1 = []
|
|
|
-
|
|
|
- #The while loop will read each page
|
|
|
- while count < num_pages:
|
|
|
- pageObj = pdfReader.getPage(count)
|
|
|
- count +=1
|
|
|
- temp = pageObj.extractText()
|
|
|
- text += temp
|
|
|
- pages1.append(temp)
|
|
|
-
|
|
|
- fullText1 = text
|
|
|
- fullText1 = fullText1.replace('\n', ' ')
|
|
|
- fullText1 = fullText1.replace(' \n', ' ')
|
|
|
- fullText1 = re.sub(' +', ' ', fullText1)
|
|
|
-
|
|
|
- while True:
|
|
|
- try:
|
|
|
- inz = fullText1.index('Seite')
|
|
|
- temp = ' '.join(fullText1[inz:].split()[:4])
|
|
|
- fullText1 = fullText1.replace(temp, '')
|
|
|
- except:
|
|
|
- break
|
|
|
+ fullText2 = ""
|
|
|
+
|
|
|
+ pdfFileObj = open(input_file2, 'rb')
|
|
|
|
|
|
- fullText2 = ""
|
|
|
+ #The pdfReader variable is a readable object that will be parsed
|
|
|
+ pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
|
|
|
|
|
- pdfFileObj = open(input_file2, 'rb')
|
|
|
+ #discerning the number of pages will allow us to parse through all #the pages
|
|
|
+ num_pages = pdfReader.numPages
|
|
|
+ count = 0
|
|
|
+ text = ""
|
|
|
+ pages2 = []
|
|
|
|
|
|
- #The pdfReader variable is a readable object that will be parsed
|
|
|
- pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
|
|
|
+ #The while loop will read each page
|
|
|
+ while count < num_pages:
|
|
|
+ pageObj = pdfReader.getPage(count)
|
|
|
+ count +=1
|
|
|
+ temp = pageObj.extractText()
|
|
|
+ text += temp
|
|
|
+ pages2.append(temp)
|
|
|
|
|
|
- #discerning the number of pages will allow us to parse through all #the pages
|
|
|
- num_pages = pdfReader.numPages
|
|
|
- count = 0
|
|
|
- text = ""
|
|
|
- pages2 = []
|
|
|
|
|
|
- #The while loop will read each page
|
|
|
- while count < num_pages:
|
|
|
- pageObj = pdfReader.getPage(count)
|
|
|
- count +=1
|
|
|
- temp = pageObj.extractText()
|
|
|
- text += temp
|
|
|
- pages2.append(temp)
|
|
|
+ fullText2 = text
|
|
|
+ fullText2 = fullText2.replace('\n', ' ')
|
|
|
+ fullText2 = fullText2.replace(' \n', ' ')
|
|
|
+ fullText2 = re.sub(' +', ' ', fullText2)
|
|
|
+
|
|
|
+ while True:
|
|
|
+ try:
|
|
|
+ inz = fullText2.index('Seite')
|
|
|
+ temp = ' '.join(fullText2[inz:].split()[:4])
|
|
|
+ fullText2 = fullText2.replace(temp, '')
|
|
|
+ except:
|
|
|
+ break
|
|
|
|
|
|
+ str1 = fullText1
|
|
|
+ str2 = fullText2
|
|
|
|
|
|
- fullText2 = text
|
|
|
- fullText2 = fullText2.replace('\n', ' ')
|
|
|
- fullText2 = fullText2.replace(' \n', ' ')
|
|
|
- fullText2 = re.sub(' +', ' ', fullText2)
|
|
|
+ delta = difflib.Differ().compare(str1.split(), str2.split())
|
|
|
+ # difflist = []
|
|
|
+ one = []
|
|
|
+ two = []
|
|
|
|
|
|
- while True:
|
|
|
+
|
|
|
+ for line in delta:
|
|
|
+ if line[0] == '?':
|
|
|
+ continue
|
|
|
+ elif line[0] == ' ':
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ if line[0] == '-':
|
|
|
+ one.append(line[2:])
|
|
|
+ elif line[0] == '+':
|
|
|
+ two.append(line[2:])
|
|
|
+
|
|
|
+ # difflist.append(line)
|
|
|
+
|
|
|
+
|
|
|
+ # mix = [l[:] for l in '\n'.join(difflist).splitlines() if l]
|
|
|
+ one = [l[:] for l in '\n'.join(one).splitlines() if l]
|
|
|
+ two = [l[:] for l in '\n'.join(two).splitlines() if l]
|
|
|
+
|
|
|
+ one_text = ' '.join(one)
|
|
|
+ two_text = ' '.join(two)
|
|
|
+
|
|
|
+ one_final = one_text
|
|
|
+ two_final = two_text
|
|
|
+ matches = SequenceMatcher(None, one_text, two_text).get_matching_blocks()
|
|
|
+ for match in matches:
|
|
|
+ sen = one_text[match.a:match.a + match.size]
|
|
|
+ if len(sen) > 6:
|
|
|
+ # print(sen)
|
|
|
+ one_final = one_final.replace(sen, ' ', 1)
|
|
|
+ two_final = two_final.replace(sen, ' ', 1)
|
|
|
+
|
|
|
+ one_text = one_final
|
|
|
+ two_text = two_final
|
|
|
+
|
|
|
+ matches = SequenceMatcher(None, two_text, one_text).get_matching_blocks()
|
|
|
+ for match in matches:
|
|
|
+ sen = two_text[match.a:match.a + match.size]
|
|
|
+ if len(sen) > 6:
|
|
|
+ # print(sen)
|
|
|
+ one_final = one_final.replace(sen, ' ', 1)
|
|
|
+ two_final = two_final.replace(sen, ' ', 1)
|
|
|
+
|
|
|
+ print('Generating', output_file1, '.....')
|
|
|
+ one_list = one_final.split()
|
|
|
+
|
|
|
+ doc1 = fitz.open(input_file1)
|
|
|
+ page_no = 0
|
|
|
+ for word in one_list:
|
|
|
+ for i in range(page_no, len(pages1)):
|
|
|
+ if word in pages1[i]:
|
|
|
+ page = doc1[i]
|
|
|
+ text_instances = page.searchFor(word)
|
|
|
+ for inst in text_instances:
|
|
|
+ highlight = page.addHighlightAnnot(inst)
|
|
|
+ break
|
|
|
+ break
|
|
|
+ page_no += 1
|
|
|
try:
|
|
|
- inz = fullText2.index('Seite')
|
|
|
- temp = ' '.join(fullText2[inz:].split()[:4])
|
|
|
- fullText2 = fullText2.replace(temp, '')
|
|
|
+ if one_list[0].isdigit():
|
|
|
+ word = one_list[0]
|
|
|
+ for i in range(len(pages1)):
|
|
|
+ page = doc1[i]
|
|
|
+ text_instances = page.searchFor(word)
|
|
|
+ for inst in text_instances:
|
|
|
+ highlight = page.addHighlightAnnot(inst)
|
|
|
+ break
|
|
|
except:
|
|
|
- break
|
|
|
-
|
|
|
- str1 = fullText1
|
|
|
- str2 = fullText2
|
|
|
-
|
|
|
- delta = difflib.Differ().compare(str1.split(), str2.split())
|
|
|
- # difflist = []
|
|
|
- one = []
|
|
|
- two = []
|
|
|
-
|
|
|
-
|
|
|
- for line in delta:
|
|
|
- if line[0] == '?':
|
|
|
- continue
|
|
|
- elif line[0] == ' ':
|
|
|
- continue
|
|
|
- else:
|
|
|
- if line[0] == '-':
|
|
|
- one.append(line[2:])
|
|
|
- elif line[0] == '+':
|
|
|
- two.append(line[2:])
|
|
|
-
|
|
|
- # difflist.append(line)
|
|
|
-
|
|
|
-
|
|
|
- # mix = [l[:] for l in '\n'.join(difflist).splitlines() if l]
|
|
|
- one = [l[:] for l in '\n'.join(one).splitlines() if l]
|
|
|
- two = [l[:] for l in '\n'.join(two).splitlines() if l]
|
|
|
-
|
|
|
- one_text = ' '.join(one)
|
|
|
- two_text = ' '.join(two)
|
|
|
-
|
|
|
- one_final = one_text
|
|
|
- two_final = two_text
|
|
|
- matches = SequenceMatcher(None, one_text, two_text).get_matching_blocks()
|
|
|
- for match in matches:
|
|
|
- sen = one_text[match.a:match.a + match.size]
|
|
|
- if len(sen) > 6:
|
|
|
- # print(sen)
|
|
|
- one_final = one_final.replace(sen, ' ', 1)
|
|
|
- two_final = two_final.replace(sen, ' ', 1)
|
|
|
-
|
|
|
- one_text = one_final
|
|
|
- two_text = two_final
|
|
|
-
|
|
|
- matches = SequenceMatcher(None, two_text, one_text).get_matching_blocks()
|
|
|
- for match in matches:
|
|
|
- sen = two_text[match.a:match.a + match.size]
|
|
|
- if len(sen) > 6:
|
|
|
- # print(sen)
|
|
|
- one_final = one_final.replace(sen, ' ', 1)
|
|
|
- two_final = two_final.replace(sen, ' ', 1)
|
|
|
-
|
|
|
- print('Generating', output_file1, '.....')
|
|
|
- one_list = one_final.split()
|
|
|
-
|
|
|
- doc1 = fitz.open(input_file1)
|
|
|
- page_no = 0
|
|
|
- for word in one_list:
|
|
|
- for i in range(page_no, len(pages1)):
|
|
|
- if word in pages1[i]:
|
|
|
- page = doc1[i]
|
|
|
- text_instances = page.searchFor(word)
|
|
|
- for inst in text_instances:
|
|
|
- highlight = page.addHighlightAnnot(inst)
|
|
|
- break
|
|
|
- break
|
|
|
- page_no += 1
|
|
|
-
|
|
|
- if one_list[0].isdigit():
|
|
|
- word = one_list[0]
|
|
|
- for i in range(len(pages1)):
|
|
|
- page = doc1[i]
|
|
|
- text_instances = page.searchFor(word)
|
|
|
- for inst in text_instances:
|
|
|
- highlight = page.addHighlightAnnot(inst)
|
|
|
- break
|
|
|
-
|
|
|
- doc1.save(output_file1, garbage=4, deflate=True, clean=True)
|
|
|
-
|
|
|
- print('Generating', output_file2, '.....')
|
|
|
- two_list = two_final.split()
|
|
|
-
|
|
|
- # for i, page in enumerate(pages1):
|
|
|
- doc2 = fitz.open(input_file2)
|
|
|
- page_no = 0
|
|
|
- for word in two_list:
|
|
|
- for i in range(page_no, len(pages2)):
|
|
|
- if word in pages2[i]:
|
|
|
- page = doc2[i]
|
|
|
- text_instances = page.searchFor(word)
|
|
|
- for inst in text_instances:
|
|
|
- highlight = page.addHighlightAnnot(inst)
|
|
|
+ pass
|
|
|
+ doc1.save(output_file1, garbage=4, deflate=True, clean=True)
|
|
|
+
|
|
|
+ print('Generating', output_file2, '.....')
|
|
|
+ two_list = two_final.split()
|
|
|
+
|
|
|
+ # for i, page in enumerate(pages1):
|
|
|
+ doc2 = fitz.open(input_file2)
|
|
|
+ page_no = 0
|
|
|
+ for word in two_list:
|
|
|
+ for i in range(page_no, len(pages2)):
|
|
|
+ if word in pages2[i]:
|
|
|
+ page = doc2[i]
|
|
|
+ text_instances = page.searchFor(word)
|
|
|
+ for inst in text_instances:
|
|
|
+ highlight = page.addHighlightAnnot(inst)
|
|
|
+ break
|
|
|
break
|
|
|
- break
|
|
|
- page_no += 1
|
|
|
- if two_list[0].isdigit():
|
|
|
- word = two_list[0]
|
|
|
- for i in range(len(pages2)):
|
|
|
- page = doc2[i]
|
|
|
- text_instances = page.searchFor(word)
|
|
|
- for inst in text_instances:
|
|
|
- highlight = page.addHighlightAnnot(inst)
|
|
|
- break
|
|
|
- doc2.save(output_file2, garbage=4, deflate=True, clean=True)
|
|
|
- zipObj = ZipFile('output/output.zip', 'w')
|
|
|
- zipObj.write(output_file1)
|
|
|
- zipObj.write(output_file2)
|
|
|
- zipObj.close()
|
|
|
- print('Finish')
|
|
|
- except:
|
|
|
- print('error in comparison')
|
|
|
- return redirect(url_for('.index',message = 'error in comparison'))
|
|
|
- return send_file('C:\\Users\\Siraj\\PycharmProjects\\baangt\\output\\output.zip',as_attachment=True)
|
|
|
-
|
|
|
+ page_no += 1
|
|
|
+ try:
|
|
|
+ if two_list[0].isdigit():
|
|
|
+ word = two_list[0]
|
|
|
+ for i in range(len(pages2)):
|
|
|
+ page = doc2[i]
|
|
|
+ text_instances = page.searchFor(word)
|
|
|
+ for inst in text_instances:
|
|
|
+ highlight = page.addHighlightAnnot(inst)
|
|
|
+ break
|
|
|
+ except:
|
|
|
+ pass
|
|
|
+ doc2.save(output_file2, garbage=4, deflate=True, clean=True)
|
|
|
+ zipObj = ZipFile('output/output.zip', 'w')
|
|
|
+ zipObj.write(output_file1)
|
|
|
+ zipObj.write(output_file2)
|
|
|
+ zipObj.close()
|
|
|
+ print('Finish')
|
|
|
+ except Exception as e:
|
|
|
+
|
|
|
+ print('error in comparison')
|
|
|
+ print(e)
|
|
|
+ return redirect(url_for('.index',message = 'error in comparison'))
|
|
|
+
|
|
|
+
|
|
|
+ return send_file('output\\output.zip', as_attachment=True)
|
|
|
+ else:
|
|
|
+ return redirect(url_for('.index',message = 'Request type not matched.'))
|
|
|
if __name__ == "__main__":
|
|
|
app.run()
|