Snippets

openpyxl Reading multiple worksheets in parallel

Updated by CharlieC

File snippet.BUILD Modified

  • Ignore whitespace
  • Hide word diff
                        keep_links=False, data_only=True)
     print("    Workbook loaded {0:.2f}s".format(perf_counter() - begin))
     sheets = wb.sheetnames
-    with ProcessPoolExecutor(2) as pool:
+    with ProcessPoolExecutor() as pool:
         for ws in pool.map(parallel_worksheet, sheets):
             pass
     end = perf_counter()
Created by CharlieC

File snippet.BUILD Added

  • Ignore whitespace
  • Hide word diff
+from concurrent.futures import ProcessPoolExecutor
+from openpyxl import load_workbook
+from time import perf_counter
+
+test_file = "Issues/bug494.xlsx"
+
+
+def parallel_worksheet(sheetname):
+    begin = perf_counter()
+    wb = load_workbook(test_file, read_only=True,
+                       data_only=True, keep_links=False)
+    ws = wb[sheetname]
+    for row in ws.iter_rows(values_only=True):
+        for value in row:
+            pass
+    end = perf_counter()
+    print("    {0} {1:.2f}s".format(sheetname, end - begin))
+
+
+def parallel_read():
+    print("Parallised Read")
+    begin = perf_counter()
+    wb = load_workbook(test_file, read_only=True,
+                       keep_links=False, data_only=True)
+    print("    Workbook loaded {0:.2f}s".format(perf_counter() - begin))
+    sheets = wb.sheetnames
+    with ProcessPoolExecutor(2) as pool:
+        for ws in pool.map(parallel_worksheet, sheets):
+            pass
+    end = perf_counter()
+    print("    Total time {0:.2f}s".format(end - begin))
+
+
+if __name__ == "__main__":
+    parallel_read()