Commits

Marko Loparic  committed 09f8e8b

[scripts/plantrisk] execfile limitation solved by splitting the script

  • Participants
  • Parent commits 28662e5

Comments (0)

Files changed (3)

File scripts/plantrisk/__init__.py

Empty file added.

File scripts/plantrisk/run_scenarios.py

-import openpyxl
-from openpyxl.cell import (get_column_letter, column_index_from_string, 
-        coordinate_from_string)
 import os
-import os.path as osp
-import time
-import itertools
-import shutil
+import sys
 
-# XXXX from launch import launch
-from launch_uncompiled import launch
+# trick for python -u
+unbuffered = os.fdopen(sys.stdout.fileno(), 'w', 0)
+sys.stdout.close()
+sys.stdout = unbuffered
 
-SCENARIO_COLUMN = 'business scenario'
-HEADER_ROW_ID = 1  # second row (doesn't work for market sheets!)
-MIN_ROWS = 5  # EL_ECP for instance has 5 lines
-SCENARIO_NAME_TABLE = '1.ScenMgt!F24'
-OUTPUT_SUBDIR = 'Output'
+# is there a way to avoid this ugly hack
+if os.path.basename(sys.executable).lower() not in ('python', 'python.exe'):
+    script_path = os.path.abspath(sys.argv[0])
+    p = os.path.dirname
+    package_parent_dir = p(p(p(p(script_path))))
+    sys.path.insert(0, package_parent_dir)
+print 'sys.path =', sys.path
 
-# some sheets need a specific treatment because there is data after empty lines
-NON_SCENARIO_SHEETS = ['1.StudyInit', '1.ScenMgt', 'Settings']
-MIN_ROWS_NON_SCENARIO = 300  # probably safe...
+from limma.scripts.plantrisk.run_scenarios_functions import run_scenarios
 
-class Scenario(object):
-    def __init__(self, name, base_dir):
-        self.name = name
-        self.path = osp.join(base_dir, '%s.xlsx' % name)
-        self.run_dir = osp.join(base_dir, name)
-        self.output_dir = osp.join(self.run_dir, OUTPUT_SUBDIR)
-
-def run_scenarios(wb_path, run_dir):
-
-    t1 = time.time()
-
-    if osp.exists(run_dir):
-        shutil.rmtree(run_dir)
-    os.mkdir(run_dir)
-    
-    print "opening source workbook ...",
-    wb = openpyxl.load_workbook(wb_path, True)
-    print "took %.2fs" % (time.time() - t1)
-
-    scenario_names = get_scenario_names(wb)
-    if len(scenario_names) == 0:
-        raise ValueError("No business scenario found in table starting at %s"
-                         % SCENARIO_NAME_TABLE)
-
-    scenarios = []
-    for name in scenario_names:
-        scen = Scenario(name, run_dir)
-        scenarios.append(scen)
-
-    split_workbooks(wb, scenarios)
-
-    for scen in scenarios:
-        print 
-        print "=" * 80
-        print "Executing PlantRisk for scenario", scen.name
-        print "=" * 80
-        launch(scen.path, scen.run_dir)
-
-    t2 = time.time()
-    print 
-    print "=" * 80
-    print "Merging csv files"
-    print "=" * 80
-    merge_csv_files(scenarios, run_dir)
-    print "csv merge took %.2fs" % (time.time() - t2)
-
-    print
-    print "total time %.2fs" % (time.time() - t1)
-
-def split_workbooks(wb, scenarios):
-    t1 = time.time()
-    for scen in scenarios:
-        new_wb = clone_workbook(wb, scen.name)
-        print "saving new workbook for scenario %s" % scen.name
-        new_wb.save(scen.path)
-    print "split time: %.2fs" % (time.time() - t1)
-
-def get_scenario_names(wb):
-    ws_name, address = SCENARIO_NAME_TABLE.split('!')
-    ws = wb.get_sheet_by_name(ws_name)
-
-    column, row = coordinate_from_string(address)
-    next_column = get_column_letter(column_index_from_string(column) + 2)
-    lookup_range = '%s%s:%s%s' % (column, row, next_column,
-                                  ws.get_highest_row()) 
-
-    scenario_names = []
-    for row in ws.iter_rows(range_string=lookup_range):
-        name, active = row
-        if (active.internal_value is not None
-            and active.internal_value.lower() == 'yes'):
-            scenario_names.append(name.internal_value)
-
-    return scenario_names
-
-def clone_workbook(wb, scenario_name):
-    new_wb = openpyxl.Workbook(True)
-    for worksheet in wb.worksheets:
-        t1 = time.time()
-        new_worksheet = new_wb.create_sheet(title=worksheet.title)
-        filter_column = None
-        filter_index = None
-        added_lines = 0
-
-        headers_row = list(itertools.islice(
-            worksheet.iter_rows(), HEADER_ROW_ID, HEADER_ROW_ID + 1))[0]
-        for cell in headers_row:
-            if cell.internal_value is not None:
-                if cell.internal_value.lower() == SCENARIO_COLUMN:
-                    filter_column = cell.column
-                    filter_index = column_index_from_string(filter_column) - 1
-                    print("[%s] sheet %s has filter column %s"
-                          % (scenario_name, worksheet.title, cell.column))
-
-        # compute where to stop when finding an empty line
-        if worksheet.title in NON_SCENARIO_SHEETS:
-            min_rows = MIN_ROWS_NON_SCENARIO
-        else:
-            min_rows = MIN_ROWS
-        
-        for row_id, row in enumerate(worksheet.iter_rows()):
-            values = [c.internal_value for c in row]
-            if row[0].column != 'A':
-                # openpyxl sometimes filters out the empty A column (openpyxl
-                # bug)
-                values.insert(0, None)
-
-            if row_id > min_rows and all([c is None for c in values]):
-                break
-
-            # skip line if:
-            # - a scenario column exists, and
-            # - we are after the header, and
-            # - there is a scenario assigned to the line, and
-            # - this scenario is not the good one
-            if filter_column is not None:
-                if row_id > HEADER_ROW_ID:
-                    name = row[filter_index].internal_value
-                    if name is not None and name != scenario_name:
-                        continue
-
-            new_worksheet.append(values)
-            added_lines += 1
-
-        print("[%s] added sheet %s with %d lines in %.2fs"
-              % (scenario_name, worksheet.title, added_lines,
-                 (time.time() - t1)))
-
-    t1 = time.time()
-    copy_ranges(wb, new_wb)
-    print("[%s] copied ranges in %.2fs" % (scenario_name, time.time() - t1))
-    return new_wb
-
-def copy_ranges(w1, w2):
-    for named_range in w1.get_named_ranges():
-        name = named_range.name
-        new_destinations = []
-        for (ws, rng) in named_range.destinations:
-            new_ws = w2.get_sheet_by_name(ws.title)
-            if not new_ws:
-                raise ValueError("cannot find sheet %s for which named range "
-                                 "%s is defined" % (ws.title, name))
-            new_destinations.append((new_ws, rng))
-        # print "creating named range %s" % name
-        nr = openpyxl.namedrange.NamedRange(name, new_destinations)
-        w2.add_named_range(nr)
-
-def merge_csv_files(scenarios, run_dir):
-
-
-    first_output_dir = scenarios[0].output_dir
-    csv_fnames = [f for f in os.listdir(first_output_dir)
-                  if f.lower().endswith('.csv')]
-    
-    output_dir = osp.join(run_dir, OUTPUT_SUBDIR)
-    os.mkdir(output_dir)
-    
-    first_scenario = True
-    for scen in scenarios:
-        print scen.name
-        for csv_fname in csv_fnames:
-            print ' ', csv_fname
-            csv_path = osp.join(scen.output_dir, csv_fname)
-            copy_lines(csv_path, csv_fname, output_dir, scen.name,
-                       first_scenario)
-        first_scenario = False
-
-def copy_lines(path_in, fname, output_dir, scen_name, first_scenario):
-    
-    i_line = 0
-    path_out = osp.join(output_dir, fname)
-    with open(path_in) as f_in:
-        with open(path_out, 'a') as f_out:
-            while True:
-
-                # get first line and guess the separator
-                line = f_in.readline()
-                i_line += 1
-                if len(line) == 0:
-                    break
-
-                if i_line == 1:
-                    # - first line: add header
-                    count_comma = line.count(',')
-                    count_semicolon = line.count(';')
-                    if count_comma > count_semicolon:
-                        sep = ','
-                    elif count_semicolon > count_comma:
-                        sep = ';'
-                    else:
-                        raise ValueError("can't guess the list separator of "
-                                         "file %s" % path_in)
-                    line_out = 'BUSINESS_SCENARIO' + sep + line
-                else:
-                    # other lines: add the scenario name
-                    if line.strip() == '':
-                        # tolerate empty line
-                        line_out = line
-                    else:
-                        # XXX: this old code removed the price scenario)
-                        #pos = line.find(sep)
-                        #if pos == -1:
-                        #    raise ValueError("List separator '%s' not found"
-                        #                     "line number %d of file %s"
-                        #                     "(%s)"
-                        #                     % (sep, i_line, fname, line))
-                        #line_out = scen_name + line[pos:]
-                        line_out = scen_name + sep + line
-
-                # skip if first line and not in first file
-                # TODO: adapt for 2-line header!
-                if i_line == 1 and not first_scenario:
-                    continue
-                
-                # write line
-                f_out.write(line_out)
-
-if __name__ == '__main__':
-    import sys
-    _wb_path, _run_dir = sys.argv[1:]
-    run_scenarios(_wb_path, _run_dir)
+wb_path, run_dir = sys.argv[1:]
+run_scenarios(wb_path, run_dir)

File scripts/plantrisk/run_scenarios_functions.py

+import os
+import os.path as osp
+import time
+import itertools
+import shutil
+import sys
+import openpyxl
+from openpyxl.cell import (get_column_letter, column_index_from_string, 
+        coordinate_from_string)
+
+from limma.scripts.plantrisk.launch import launch
+#from limma.scripts.plantrisk.launch_uncompiled import launch
+
+SCENARIO_COLUMN = 'business scenario'
+HEADER_ROW_ID = 1  # second row (doesn't work for market sheets!)
+MIN_ROWS = 5  # EL_ECP for instance has 5 lines
+SCENARIO_NAME_TABLE = '1.ScenMgt!F24'
+OUTPUT_SUBDIR = 'Output'
+
+# some sheets need a specific treatment because there is data after empty lines
+NON_SCENARIO_SHEETS = ['1.StudyInit', '1.ScenMgt', 'Settings']
+MIN_ROWS_NON_SCENARIO = 300  # probably safe...
+
+class Scenario(object):
+    def __init__(self, name, base_dir):
+        self.name = name
+        self.path = osp.join(base_dir, '%s.xlsx' % name)
+        self.run_dir = osp.join(base_dir, name)
+        self.output_dir = osp.join(self.run_dir, OUTPUT_SUBDIR)
+
+def run_scenarios(wb_path, run_dir):
+
+    t1 = time.time()
+
+    if osp.exists(run_dir):
+        shutil.rmtree(run_dir)
+    os.mkdir(run_dir)
+    
+    print "opening source workbook ...",
+    wb = openpyxl.load_workbook(wb_path, True)
+    print "took %.2fs" % (time.time() - t1)
+
+    scenario_names = get_scenario_names(wb)
+    if len(scenario_names) == 0:
+        raise ValueError("No business scenario found in table starting at %s"
+                         % SCENARIO_NAME_TABLE)
+
+    scenarios = []
+    for name in scenario_names:
+        scen = Scenario(name, run_dir)
+        scenarios.append(scen)
+
+    split_workbooks(wb, scenarios)
+
+    for scen in scenarios:
+        print 
+        print "=" * 80
+        print "Executing PlantRisk for scenario", scen.name
+        print "=" * 80
+        launch(scen.path, scen.run_dir)
+
+    t2 = time.time()
+    print 
+    print "=" * 80
+    print "Merging csv files"
+    print "=" * 80
+    merge_csv_files(scenarios, run_dir)
+    print "csv merge took %.2fs" % (time.time() - t2)
+
+    print
+    print "total time %.2fs" % (time.time() - t1)
+
+def split_workbooks(wb, scenarios):
+    t1 = time.time()
+    for scen in scenarios:
+        new_wb = clone_workbook(wb, scen.name)
+        print "saving new workbook for scenario %s" % scen.name
+        new_wb.save(scen.path)
+    print "split time: %.2fs" % (time.time() - t1)
+
+def get_scenario_names(wb):
+    ws_name, address = SCENARIO_NAME_TABLE.split('!')
+    ws = wb.get_sheet_by_name(ws_name)
+
+    column, row = coordinate_from_string(address)
+    next_column = get_column_letter(column_index_from_string(column) + 2)
+    lookup_range = '%s%s:%s%s' % (column, row, next_column,
+                                  ws.get_highest_row()) 
+
+    scenario_names = []
+    for row in ws.iter_rows(range_string=lookup_range):
+        name, active = row
+        if (active.internal_value is not None
+            and active.internal_value.lower() == 'yes'):
+            scenario_names.append(name.internal_value)
+
+    return scenario_names
+
+def clone_workbook(wb, scenario_name):
+    new_wb = openpyxl.Workbook(True)
+    for worksheet in wb.worksheets:
+        t1 = time.time()
+        new_worksheet = new_wb.create_sheet(title=worksheet.title)
+        filter_column = None
+        filter_index = None
+        added_lines = 0
+
+        headers_row = list(itertools.islice(
+            worksheet.iter_rows(), HEADER_ROW_ID, HEADER_ROW_ID + 1))[0]
+        for cell in headers_row:
+            if cell.internal_value is not None:
+                if cell.internal_value.lower() == SCENARIO_COLUMN:
+                    filter_column = cell.column
+                    filter_index = column_index_from_string(filter_column) - 1
+                    print("[%s] sheet %s has filter column %s"
+                          % (scenario_name, worksheet.title, cell.column))
+
+        # compute where to stop when finding an empty line
+        if worksheet.title in NON_SCENARIO_SHEETS:
+            min_rows = MIN_ROWS_NON_SCENARIO
+        else:
+            min_rows = MIN_ROWS
+        
+        for row_id, row in enumerate(worksheet.iter_rows()):
+            values = [c.internal_value for c in row]
+            if row[0].column != 'A':
+                # openpyxl sometimes filters out the empty A column (openpyxl
+                # bug)
+                values.insert(0, None)
+
+            if row_id > min_rows and all([c is None for c in values]):
+                break
+
+            # skip line if:
+            # - a scenario column exists, and
+            # - we are after the header, and
+            # - there is a scenario assigned to the line, and
+            # - this scenario is not the good one
+            if filter_column is not None:
+                if row_id > HEADER_ROW_ID:
+                    name = row[filter_index].internal_value
+                    if name is not None and name != scenario_name:
+                        continue
+
+            new_worksheet.append(values)
+            added_lines += 1
+
+        print("[%s] added sheet %s with %d lines in %.2fs"
+              % (scenario_name, worksheet.title, added_lines,
+                 (time.time() - t1)))
+
+    t1 = time.time()
+    copy_ranges(wb, new_wb)
+    print("[%s] copied ranges in %.2fs" % (scenario_name, time.time() - t1))
+    return new_wb
+
+def copy_ranges(w1, w2):
+    for named_range in w1.get_named_ranges():
+        name = named_range.name
+        new_destinations = []
+        for (ws, rng) in named_range.destinations:
+            new_ws = w2.get_sheet_by_name(ws.title)
+            if not new_ws:
+                raise ValueError("cannot find sheet %s for which named range "
+                                 "%s is defined" % (ws.title, name))
+            new_destinations.append((new_ws, rng))
+        # print "creating named range %s" % name
+        nr = openpyxl.namedrange.NamedRange(name, new_destinations)
+        w2.add_named_range(nr)
+
+def merge_csv_files(scenarios, run_dir):
+
+
+    first_output_dir = scenarios[0].output_dir
+    csv_fnames = [f for f in os.listdir(first_output_dir)
+                  if f.lower().endswith('.csv')]
+    
+    output_dir = osp.join(run_dir, OUTPUT_SUBDIR)
+    os.mkdir(output_dir)
+    
+    first_scenario = True
+    for scen in scenarios:
+        print scen.name
+        for csv_fname in csv_fnames:
+            print ' ', csv_fname
+            csv_path = osp.join(scen.output_dir, csv_fname)
+            copy_lines(csv_path, csv_fname, output_dir, scen.name,
+                       first_scenario)
+        first_scenario = False
+
+def copy_lines(path_in, fname, output_dir, scen_name, first_scenario):
+    
+    i_line = 0
+    path_out = osp.join(output_dir, fname)
+    with open(path_in) as f_in:
+        with open(path_out, 'a') as f_out:
+            while True:
+
+                # get first line and guess the separator
+                line = f_in.readline()
+                i_line += 1
+                if len(line) == 0:
+                    break
+
+                if i_line == 1:
+                    # - first line: add header
+                    count_comma = line.count(',')
+                    count_semicolon = line.count(';')
+                    if count_comma > count_semicolon:
+                        sep = ','
+                    elif count_semicolon > count_comma:
+                        sep = ';'
+                    else:
+                        raise ValueError("can't guess the list separator of "
+                                         "file %s" % path_in)
+                    line_out = 'BUSINESS_SCENARIO' + sep + line
+                else:
+                    # other lines: add the scenario name
+                    if line.strip() == '':
+                        # tolerate empty line
+                        line_out = line
+                    else:
+                        # XXX: this old code removed the price scenario)
+                        #pos = line.find(sep)
+                        #if pos == -1:
+                        #    raise ValueError("List separator '%s' not found"
+                        #                     "line number %d of file %s"
+                        #                     "(%s)"
+                        #                     % (sep, i_line, fname, line))
+                        #line_out = scen_name + line[pos:]
+                        line_out = scen_name + sep + line
+
+                # skip if first line and not in first file
+                # TODO: adapt for 2-line header!
+                if i_line == 1 and not first_scenario:
+                    continue
+                
+                # write line
+                f_out.write(line_out)
+
+if __name__ == '__main__':
+    _wb_path, _run_dir = sys.argv[1:]
+    run_scenarios(_wb_path, _run_dir)