delete_rows does not work on deleting multiple rows

Issue #964 resolved
Troels Schwarz-Linnet
created an issue

Hi!

I updated to version 2.5.

I read in the CHANGES, that 2.5 had a Major Change "You can now insert and delete rows and columns in worksheets"

I found the documentation for this feature in: http://openpyxl.readthedocs.io/en/default/api/openpyxl.worksheet.worksheet.html

delete_cols(idx, amount=1)

Consider the following test code:

import shutil, sys
from distutils.version import StrictVersion

from openpyxl import Workbook
from openpyxl import load_workbook

# Test version
from openpyxl import __version__
test_version = StrictVersion(__version__) == StrictVersion("2.5.0")
if not test_version:
    print("You need to have openpyxl version 2.5.0. You have %s"%__version__)
    sys.exit()

# Create Workbook, Get the active shees
wb = Workbook()
ws = wb.active

ws.append(['Hello 1'])
ws.append(['Rem 1'])
ws.append(['Rem 2'])
ws.append(['Rem 3'])
ws.append(['Hello 2'])

wb.save('test_1.xlsx')
# Copy
shutil.copy2('test_1.xlsx', 'test_2.xlsx')

# Open workbook
wb = load_workbook('test_2.xlsx')
ws = wb.active

# Delete 1 row
#Work as expected. "Rem 1" is deleted 

# Delete 2 rows
#Work as expected. "Rem 1" and "Rem 2" is deleted 
#ws.delete_rows(2, 1)
#ws.delete_rows(2, 1)

# Delete 3 rows, and the same time
ws.delete_rows(2, 3)

#### Error, out is:
# Hello 1
# Hello 2
# Rem 2
# Rem 3

wb.save('test_2.xlsx')

If I check with the source code: http://openpyxl.readthedocs.io/en/default/_modules/openpyxl/worksheet/worksheet.html

There is this section

def delete_rows(self, idx, amount=1):
        """
        Delete row or rows from row==idx
        """
        self._move_cells(min_row=idx+amount, offset=-amount, row_or_col="row")

Which leads to

    def _move_cells(self, min_row=None, min_col=None, offset=0, row_or_col="row"):
        """
        Move either rows or columns around by the offset
        """
        reverse = offset > 0 # start at the end if moving down

        cells = sorted(self._cells.values(), key=attrgetter(row_or_col), reverse=reverse)

        for cell in cells:
            if min_row and cell.row < min_row:
                continue
            elif min_col and cell.col_idx < min_col:
                continue

            del self._cells[(cell.row, cell.col_idx)] # remove old ref

            val = getattr(cell, row_or_col)
            setattr(cell, row_or_col, val+offset) # calculate new coords

            self._cells[(cell.row, cell.col_idx)] = cell # add new ref

Comments (4)

  1. Troels Schwarz-Linnet reporter

    This is a dirty fix. But the height of the cells will still be the same

    import shutil, sys
    from distutils.version import StrictVersion
    
    from openpyxl import Workbook
    from openpyxl import load_workbook
    
    # Test version
    from openpyxl import __version__
    test_version = StrictVersion(__version__) == StrictVersion("2.5.0")
    if not test_version:
        print("You need to have openpyxl version 2.5.0. You have %s"%__version__)
        sys.exit()
    
    # Create Workbook, Get the active shees
    wb = Workbook()
    ws = wb.active
    
    ws.append(['Hello 1'])
    ws.append(['Rem 1'])
    ws.append(['Rem 2'])
    ws.append(['Rem 3'])
    ws.append(['Hello 2'])
    
    # Save & Copy
    wb.save('test_1.xlsx')
    shutil.copy2('test_1.xlsx', 'test_2.xlsx')
    
    # Open workbook
    wb = load_workbook('test_2.xlsx')
    ws = wb.active
    
    # Check for Rem and set height
    for iRow, cCol in enumerate(ws.rows):
        iRow += 1
        cCell = ws['A%i'%(iRow)]
        if cCell.value == None:
            continue
        elif "Rem" in cCell.value:
            # Set height property we can look for
            ws.row_dimensions[iRow].height = 0.1
    
    # Loop again, and look for height
    cont_loop = True
    while cont_loop:
        for iRow, cCol in enumerate(ws.rows):
            iRow += 1
            cCell = ws['A%i'%(iRow)]
            # Get the height
            height = ws.row_dimensions[iRow].height
    
            # Break at end
            if iRow == ws.max_row:
                cont_loop = False
            elif height == 0.1:
                # Delete
                ws.delete_rows(iRow, 1)
                break
    
    wb.save('test_2.xlsx')
    
  2. Troels Schwarz-Linnet reporter

    This also illustrates that row properties is not deleted

    import shutil, sys
    from distutils.version import StrictVersion
    
    from openpyxl import Workbook
    from openpyxl import load_workbook
    
    # Test version
    from openpyxl import __version__
    test_version = StrictVersion(__version__) == StrictVersion("2.5.0")
    if not test_version:
        print("You need to have openpyxl version 2.5.0. You have %s"%__version__)
        sys.exit()
    
    # Create Workbook, Get the active shees
    wb = Workbook()
    ws = wb.active
    
    ws.append(['Hello 1'])
    ws.append(['Rem 1'])
    ws.append(['Rem 2'])
    ws.append(['Rem 3'])
    ws.append(['Hello 2'])
    
    # Modify some properties
    ws.row_dimensions[1].height = 20
    ws.row_dimensions[2].height = 30
    ws.row_dimensions[3].height = 40
    ws.row_dimensions[4].height = 50
    ws.row_dimensions[5].height = 60
    
    # Save & Copy
    wb.save('test_1.xlsx')
    shutil.copy2('test_1.xlsx', 'test_2.xlsx')
    
    # Open workbook
    wb = load_workbook('test_2.xlsx')
    ws = wb.active
    
    # Check for Rem and set height
    for iRow, cCol in enumerate(ws.rows):
        iRow += 1
        cCell = ws['A%i'%(iRow)]
        if cCell.value == None:
            continue
        elif "Rem" in cCell.value:
            # Set height property we can look for
            cCell.value = "DELETE ME"
    
    # Loop again, and look for height
    cont_loop = True
    while cont_loop:
        for iRow, cCol in enumerate(ws.rows):
            iRow += 1
            cCell = ws['A%i'%(iRow)]
    
            # Break at end
            if iRow == ws.max_row:
                cont_loop = False
            elif cCell.value == "DELETE ME":
                # Delete
                ws.delete_rows(iRow, 1)
                break
    
    wb.save('test_2.xlsx')
    
  3. CharlieC

    Thanks for the report. I'm less concerned about the row/column dimension stuff, which is a separate issue. The problem with multiples seems to be related to when you want to delete more rows or columns than actually contained which causes a kind of wraparound. Maybe I should stop adding features? ;-)

  4. Log in to comment