Snippets

CharlieC Write performance benchmark

Updated by CharlieC

File snippet.BUILD Modified

  • Ignore whitespace
  • Hide word diff
 row_max = 1000
 col_max = 50
 sheets = 1
+factor = 0.1
 
 if len(sys.argv) > 1:
     row_max = int(sys.argv[1])
 if len(sys.argv) > 3:
     sheets = int(sys.argv[3])
 
+if len(sys.argv) > 4:
+    factor = float(sys.argv[4])
+
 
 def random_string():
     """
     chars = sample(ascii_letters, randint(3, 12))
     return "".join(chars)
 
+strings = []
+for r in range(int(sheets * row_max * factor)):
+    strings.append([random_string() for c in range(col_max)])
+
 def print_elapsed_time(module_name, elapsed, optimised=False):
     """ Print module run times in a consistent format. """
     if optimised:
 def time_xlsxwriter(optimised=False):
     """ Run XlsxWriter in optimised/constant memory mode. """
     options = {}
+    filename = 'xlsxwriter.xlsx'
     module_name = "xlsxwriter"
     if optimised:
         options['constant_memory'] = True
+        filename = "xlsxwriter_opt.xlsx"
 
     start_time = process_time()
-    filename = 'xlsxwriter.xlsx'
+
 
+    string_rows = iter(strings)
     workbook = xlsxwriter.Workbook(filename,
                                    options=options)
     for r in range(sheets):
 
         for row in range(row_max):
             if not row % 10:
-                data = [random_string() for col in range(col_max)]
+                data = next(string_rows)
             else:
                 data = [row + col for col in range(col_max)]
             worksheet.write_row(row, 0, data)
 def time_openpyxl(optimised=False):
     """ Run OpenPyXL in default mode. """
     module_name = "openpyxl"
+    filename = 'openpyxl.xlsx'
+    if optimised:
+        filename = 'openpyxl_opt.xlsx'
 
     start_time = process_time()
-    filename = 'openpyxl.xlsx'
+    string_rows = iter(strings)
 
     workbook = openpyxl.Workbook(write_only=optimised)
     for r in range(sheets):
 
         for row in range(row_max):
             if not row % 10:
-                data = (random_string() for col in range(col_max))
+                data = next(string_rows)
             else:
                 data = (row + col for col in range(col_max))
             worksheet.append(data)
 print("    Rows = %d" % row_max)
 print("    Cols = %d" % col_max)
 print("    Sheets = %d" % sheets)
+print("    Proportion text = %0.2f" % factor)
 print("")
 
 print("Times:")
Updated by CharlieC

File snippet.BUILD Modified

  • Ignore whitespace
  • Hide word diff
 #
 # python bench_excel_writers.py [num_rows] [num_cols]
 #
-# Copyright 2013-2015, John McNamara, jmcnamara@cpan.org
+# Copyright 2013-2018, John McNamara, Charlie Clark
 #
 import os
 import sys
     from time import process_time
 except ImportError:
     from time import clock as process_time
+from random import randint, sample
+from string import ascii_letters
 
 import openpyxl
 import xlsxwriter
 
 # Default to 1 sheet with 1000 rows x 50 cols
+# 10 % strings
 row_max = 1000
 col_max = 50
 sheets = 1
 
 if len(sys.argv) > 1:
     row_max = int(sys.argv[1])
-    sheets = int(sys.argv[3])
 
 if len(sys.argv) > 2:
     col_max = int(sys.argv[2])
     sheets = int(sys.argv[3])
 
 
+def random_string():
+    """
+    Return a random set of letters
+    """
+    chars = sample(ascii_letters, randint(3, 12))
+    return "".join(chars)
+
 def print_elapsed_time(module_name, elapsed, optimised=False):
     """ Print module run times in a consistent format. """
     if optimised:
         options['constant_memory'] = True
 
     start_time = process_time()
-    filename = 'xlsxwriter_opt.xlsx'
+    filename = 'xlsxwriter.xlsx'
 
     workbook = xlsxwriter.Workbook(filename,
                                    options=options)
     for r in range(sheets):
         worksheet = workbook.add_worksheet()
 
-        for row in range(0, row_max, 2):
-            string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
-            worksheet.write_row(row, 0, string_data)
-
-            num_data = [row + col for col in range(col_max)]
-            worksheet.write_row(row + 1, 0, num_data)
+        for row in range(row_max):
+            if not row % 10:
+                data = [random_string() for col in range(col_max)]
+            else:
+                data = [row + col for col in range(col_max)]
+            worksheet.write_row(row, 0, data)
 
     workbook.close()
 
     for r in range(sheets):
         worksheet = workbook.create_sheet()
 
-        for row in range(row_max // 2):
-
-            string_data = ("Row: %d Col: %d" % (row, col) for col in range(col_max))
-            worksheet.append(string_data)
-
-            num_data = (row + col for col in range(col_max))
-            worksheet.append(num_data)
+        for row in range(row_max):
+            if not row % 10:
+                data = (random_string() for col in range(col_max))
+            else:
+                data = (row + col for col in range(col_max))
+            worksheet.append(data)
 
     workbook.save(filename)
 
Updated by CharlieC

File snippet.BUILD Modified

  • Ignore whitespace
  • Hide word diff
     row_max = int(sys.argv[1])
     sheets = int(sys.argv[3])
 
-elif len(sys.argv) > 2:
+if len(sys.argv) > 2:
     col_max = int(sys.argv[2])
 
-elif len(sys.argv) > 3:
+if len(sys.argv) > 3:
     sheets = int(sys.argv[3])
 
 
Updated by CharlieC

File snippet.BUILD Modified

  • Ignore whitespace
  • Hide word diff
 #
 import os
 import sys
-from time import process_time
+try:
+    from time import process_time
+except ImportError:
+    from time import clock as process_time
 
 import openpyxl
 import xlsxwriter
 
+# Default to 1 sheet with 1000 rows x 50 cols
+row_max = 1000
+col_max = 50
+sheets = 1
 
-# Default to 1000 rows x 50 cols.
 if len(sys.argv) > 1:
     row_max = int(sys.argv[1])
-    col_max = 50
-else:
-    row_max = 1000
-    col_max = 50
+    sheets = int(sys.argv[3])
 
-if len(sys.argv) > 2:
+elif len(sys.argv) > 2:
     col_max = int(sys.argv[2])
 
+elif len(sys.argv) > 3:
+    sheets = int(sys.argv[3])
 
-def print_elapsed_time(module_name, elapsed):
+
+def print_elapsed_time(module_name, elapsed, optimised=False):
     """ Print module run times in a consistent format. """
+    if optimised:
+        module_name += " (optimised)"
     print("    %-22s: %6.2f" % (module_name, elapsed))
 
 
-def time_xlsxwriter():
-    """ Run XlsxWriter in default mode. """
-    start_time = process_time()
-    filename = 'xlsxwriter.xlsx'
-
-    workbook = xlsxwriter.Workbook(filename)
-    worksheet = workbook.add_worksheet()
-
-    for row in range(0, row_max, 2):
-        string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
-        worksheet.write_row(row, 0, string_data)
+def time_xlsxwriter(optimised=False):
+    """ Run XlsxWriter in optimised/constant memory mode. """
+    options = {}
+    module_name = "xlsxwriter"
+    if optimised:
+        options['constant_memory'] = True
 
-        num_data = [row + col for col in range(col_max)]
-        worksheet.write_row(row + 1, 0, num_data)
-
-    workbook.close()
-
-    elapsed = process_time() - start_time
-    print_elapsed_time('xlsxwriter', elapsed)
-    os.remove(filename)
-
-
-def time_xlsxwriter_optimised():
-    """ Run XlsxWriter in optimised/constant memory mode. """
     start_time = process_time()
     filename = 'xlsxwriter_opt.xlsx'
 
     workbook = xlsxwriter.Workbook(filename,
-                                   options={'constant_memory': True})
-    worksheet = workbook.add_worksheet()
+                                   options=options)
+    for r in range(sheets):
+        worksheet = workbook.add_worksheet()
 
-    for row in range(0, row_max, 2):
-        string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
-        worksheet.write_row(row, 0, string_data)
+        for row in range(0, row_max, 2):
+            string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
+            worksheet.write_row(row, 0, string_data)
 
-        num_data = [row + col for col in range(col_max)]
-        worksheet.write_row(row + 1, 0, num_data)
+            num_data = [row + col for col in range(col_max)]
+            worksheet.write_row(row + 1, 0, num_data)
 
     workbook.close()
 
     elapsed = process_time() - start_time
-    print_elapsed_time('xlsxwriter (optimised)', elapsed)
+    print_elapsed_time(module_name, elapsed, optimised)
     os.remove(filename)
 
 
-def time_openpyxl():
+def time_openpyxl(optimised=False):
     """ Run OpenPyXL in default mode. """
+    module_name = "openpyxl"
+
     start_time = process_time()
     filename = 'openpyxl.xlsx'
 
-    workbook = openpyxl.workbook.Workbook()
-    worksheet = workbook.active
+    workbook = openpyxl.Workbook(write_only=optimised)
+    for r in range(sheets):
+        worksheet = workbook.create_sheet()
 
-    for row in range(row_max // 2):
+        for row in range(row_max // 2):
 
-        string_data = ("Row: %d Col: %d" % (row, col) for col in range(col_max))
-        worksheet.append(string_data)
+            string_data = ("Row: %d Col: %d" % (row, col) for col in range(col_max))
+            worksheet.append(string_data)
 
-        num_data = (row + col for col in range(col_max))
-        worksheet.append(num_data)
+            num_data = (row + col for col in range(col_max))
+            worksheet.append(num_data)
 
     workbook.save(filename)
 
     elapsed = process_time() - start_time
-    print_elapsed_time('openpyxl', elapsed)
-    os.remove(filename)
-
-
-def time_openpyxl_optimised():
-    """ Run OpenPyXL in optimised mode. """
-    start_time = process_time()
-    filename = 'openpyxl_opt.xlsx'
-
-    workbook = openpyxl.workbook.Workbook(write_only=True)
-    worksheet = workbook.create_sheet()
-
-    for row in range(row_max // 2):
-        string_data = ("Row: %d Col: %d" % (row, col) for col in range(col_max))
-        worksheet.append(string_data)
-
-        num_data = (row + col for col in range(col_max))
-        worksheet.append(num_data)
-
-    workbook.save(filename)
-
-    elapsed = process_time() - start_time
-    print_elapsed_time('openpyxl   (optimised)', elapsed)
+    print_elapsed_time(module_name, elapsed, optimised)
     os.remove(filename)
 
 
 print("")
 print("Versions:")
-print("    %-12s: %s" % ('python', sys.version[:5]))
-print("    %-12s: %s" % ('openpyxl', openpyxl.__version__))
-print("    %-12s: %s" % ('xlsxwriter', xlsxwriter.__version__))
+print("%s: %s" % ('python', sys.version[:5]))
+print("%s: %s" % ('openpyxl', openpyxl.__version__))
+print("%s: %s" % ('xlsxwriter', xlsxwriter.__version__))
 print("")
 
 print("Dimensions:")
 print("    Rows = %d" % row_max)
 print("    Cols = %d" % col_max)
+print("    Sheets = %d" % sheets)
 print("")
 
 print("Times:")
-time_xlsxwriter_optimised()
 time_xlsxwriter()
-time_openpyxl_optimised()
+time_xlsxwriter(optimised=True)
 time_openpyxl()
+time_openpyxl(optimised=True)
 print("")
Created by CharlieC

File snippet.BUILD Added

  • Ignore whitespace
  • Hide word diff
+##############################################################################
+#
+# Simple Python program to benchmark several Python Excel writing modules.
+#
+# python bench_excel_writers.py [num_rows] [num_cols]
+#
+# Copyright 2013-2015, John McNamara, jmcnamara@cpan.org
+#
+import os
+import sys
+from time import process_time
+
+import openpyxl
+import xlsxwriter
+
+
+# Default to 1000 rows x 50 cols.
+if len(sys.argv) > 1:
+    row_max = int(sys.argv[1])
+    col_max = 50
+else:
+    row_max = 1000
+    col_max = 50
+
+if len(sys.argv) > 2:
+    col_max = int(sys.argv[2])
+
+
+def print_elapsed_time(module_name, elapsed):
+    """ Print module run times in a consistent format. """
+    print("    %-22s: %6.2f" % (module_name, elapsed))
+
+
+def time_xlsxwriter():
+    """ Run XlsxWriter in default mode. """
+    start_time = process_time()
+    filename = 'xlsxwriter.xlsx'
+
+    workbook = xlsxwriter.Workbook(filename)
+    worksheet = workbook.add_worksheet()
+
+    for row in range(0, row_max, 2):
+        string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
+        worksheet.write_row(row, 0, string_data)
+
+        num_data = [row + col for col in range(col_max)]
+        worksheet.write_row(row + 1, 0, num_data)
+
+    workbook.close()
+
+    elapsed = process_time() - start_time
+    print_elapsed_time('xlsxwriter', elapsed)
+    os.remove(filename)
+
+
+def time_xlsxwriter_optimised():
+    """ Run XlsxWriter in optimised/constant memory mode. """
+    start_time = process_time()
+    filename = 'xlsxwriter_opt.xlsx'
+
+    workbook = xlsxwriter.Workbook(filename,
+                                   options={'constant_memory': True})
+    worksheet = workbook.add_worksheet()
+
+    for row in range(0, row_max, 2):
+        string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
+        worksheet.write_row(row, 0, string_data)
+
+        num_data = [row + col for col in range(col_max)]
+        worksheet.write_row(row + 1, 0, num_data)
+
+    workbook.close()
+
+    elapsed = process_time() - start_time
+    print_elapsed_time('xlsxwriter (optimised)', elapsed)
+    os.remove(filename)
+
+
+def time_openpyxl():
+    """ Run OpenPyXL in default mode. """
+    start_time = process_time()
+    filename = 'openpyxl.xlsx'
+
+    workbook = openpyxl.workbook.Workbook()
+    worksheet = workbook.active
+
+    for row in range(row_max // 2):
+
+        string_data = ("Row: %d Col: %d" % (row, col) for col in range(col_max))
+        worksheet.append(string_data)
+
+        num_data = (row + col for col in range(col_max))
+        worksheet.append(num_data)
+
+    workbook.save(filename)
+
+    elapsed = process_time() - start_time
+    print_elapsed_time('openpyxl', elapsed)
+    os.remove(filename)
+
+
+def time_openpyxl_optimised():
+    """ Run OpenPyXL in optimised mode. """
+    start_time = process_time()
+    filename = 'openpyxl_opt.xlsx'
+
+    workbook = openpyxl.workbook.Workbook(write_only=True)
+    worksheet = workbook.create_sheet()
+
+    for row in range(row_max // 2):
+        string_data = ("Row: %d Col: %d" % (row, col) for col in range(col_max))
+        worksheet.append(string_data)
+
+        num_data = (row + col for col in range(col_max))
+        worksheet.append(num_data)
+
+    workbook.save(filename)
+
+    elapsed = process_time() - start_time
+    print_elapsed_time('openpyxl   (optimised)', elapsed)
+    os.remove(filename)
+
+
+print("")
+print("Versions:")
+print("    %-12s: %s" % ('python', sys.version[:5]))
+print("    %-12s: %s" % ('openpyxl', openpyxl.__version__))
+print("    %-12s: %s" % ('xlsxwriter', xlsxwriter.__version__))
+print("")
+
+print("Dimensions:")
+print("    Rows = %d" % row_max)
+print("    Cols = %d" % col_max)
+print("")
+
+print("Times:")
+time_xlsxwriter_optimised()
+time_xlsxwriter()
+time_openpyxl_optimised()
+time_openpyxl()
+print("")