How does the automated test parse the excel file?

foreword

In automated testing, we store data nothing more than using files or databases, so the files can be csv, xlsx, xml, or even txt files. Usually, excel files are often our first choice. Whether it is writing test cases or storing test data, excel is Very convenient. So today, we will summarize the methods of processing excel files in different modules, and directly encapsulate them, so that we can use them directly in the future and increase work efficiency.

openpyxl
openpyxl is a third-party library. First, we use the command pip install openpyxl to install it directly

Note: When openpyxl operates excel, the row number and column number are calculated from 1

package code

from openpyxl import load_workbook
from openpyxl.styles import Font
from openpyxl.styles.colors import BLACK
from collections import namedtuple


class ParseExcel(object):
    """Parse excel document"""

    def __init__(self, filename, sheet_name=None):
        try:
            self.filename = filename
            self.sheet_name = sheet_name
            self.wb = load_workbook(self.filename)
            if self.sheet_name is None:
                self.work_sheet = self.wb.active
            else:
                self.work_sheet = self.wb[self.sheet_name]
        except FileNotFoundError as e:
            raise e

    def get_max_row_num(self):
        """get max line number"""
        max_row_num = self.work_sheet.max_row
        return max_row_num

    def get_max_column_num(self):
        """get maximum column number"""
        max_column = self.work_sheet.max_column
        return max_column

    def get_cell_value(self, coordinate=None, row=None, column=None):
        """Get the data of the specified cell"""
        if coordinate is not None:
            try:
                return self.work_sheet[coordinate].value
            except Exception as e:
                raise e
        elif coordinate is None and row is not None and column is not None:
            if isinstance(row, int) and isinstance(column, int):
                return self.work_sheet.cell(row=row, column=column).value
            else:
                raise TypeError('row and column must be type int')
        else:
            raise Exception("Insufficient Coordinate of cell!")

    def get_row_value(self, row):
        """Get data for a row"""
        column_num = self.get_max_column_num()
        row_value = []
        if isinstance(row, int):
            for column in range(1, column_num + 1):
                values_row = self.work_sheet.cell(row, column).value
                row_value.append(values_row)
            return row_value
        else:
            raise TypeError('row must be type int')

    def get_column_value(self, column):
        """Get a column of data"""
        row_num = self.get_max_column_num()
        column_value = []
        if isinstance(column, int):
            for row in range(1, row_num + 1):
                values_column = self.work_sheet.cell(row, column).value
                column_value.append(values_column)
            return column_value
        else:
            raise TypeError('column must be type int')

    def get_all_value_1(self):
        """Get all data of the specified form(remove header)"""
        max_row_num = self.get_max_row_num()
        max_column = self.get_max_column_num()
        values = []
        for row in range(2, max_row_num + 1):
            value_list = []
            for column in range(1, max_column + 1):
                value = self.work_sheet.cell(row, column).value
                value_list.append(value)
            values.append(value_list)
        return values

    def get_all_value_2(self):
        """Get all data of the specified form(remove header)"""
        rows_obj = self.work_sheet.iter_rows(min_row=2, max_row=self.work_sheet.max_row,
                                             values_only=True)  # Specifying values_only will directly extract data without using cell().value
        values = []
        for row_tuple in rows_obj:
            value_list = []
            for value in row_tuple:
                value_list.append(value)
            values.append(value_list)
        return values

    def get_excel_title(self):
        """Obtain sheet header"""
        title_key = tuple(self.work_sheet.iter_rows(max_row=1, values_only=True))[0]
        return title_key

    def get_listdict_all_value(self):
        """Get all data, return a list of nested dictionaries"""
        sheet_title = self.get_excel_title()
        all_values = self.get_all_value_2()
        value_list = []
        for value in all_values:
            value_list.append(dict(zip(sheet_title, value)))
        return value_list

    def get_list_nametuple_all_value(self):
        """Get all data, return a list of nested named tuples"""
        sheet_title = self.get_excel_title()
        values = self.get_all_value_2()

        excel = namedtuple('excel', sheet_title)
        value_list = []
        for value in values:
            e = excel(*value)
            value_list.append(e)
        return value_list

    def write_cell(self, row, column, value=None, bold=True, color=BLACK):
        """
        Write data to the specified cell
        :param work_sheet:
        :param row: line number
        :param column: column number
        :param value: data to be written
        :param bold: bold, default bold
        :param color: font color, default black
        :return:
        """
        try:
            if isinstance(row, int) and isinstance(column, int):
                cell_obj = self.work_sheet.cell(row, column)
                cell_obj.font = Font(color=color, bold=bold)
                cell_obj.value = value
                self.wb.save(self.filename)
            else:
                raise TypeError('row and column must be type int')
        except Exception as e:
            raise e

if __name__ == '__main__':
    pe = ParseExcel('testdata.xlsx')
    # sheet = pe.get_sheet_object('testcase')

    column_row = pe.get_max_column_num()
    print('maximum column number:', column_row)
    max_row = pe.get_max_row_num()
    print('maximum line number:', max_row)
    #
    cell_value_1 = pe.get_cell_value(row=2, column=3)
    print('the first%d OK, the first%d The column data is: %s' % (2, 3, cell_value_1))

    cell_value_2 = pe.get_cell_value(coordinate='A5')
    print('A5 The cell data is: {}'.format(cell_value_2))

    value_row = pe.get_row_value(3)
    print('the first{}The row data is:{}'.format(3, value_row))

    value_column = pe.get_column_value(2)
    print('the first{}The column data is:{}'.format(2, value_column))
    #
    values_1 = pe.get_all_value_1()
    print('The first way to get all the data\n', values_1)

    values_2 = pe.get_all_value_2()
    print('The second way to get all the data\n', values_2)

    title = pe.get_excel_title()
    print('The header is\n{}'.format(title))

    dict_value = pe.get_listdict_all_value()
    print('a list of nested dictionaries of all data:\n', dict_value)
    #
    namedtuple_value = pe.get_list_nametuple_all_value()
    print('a list of nested named tuples of all data:\n', namedtuple_value)

    pe.write_cell(1, 2, 'Tc_title')

xlrd
Install xlrd, this module only supports read operations. If you want to write, you need to use xlwt or use xlutils to cooperate with xlrd, but using xlwt can only write new excel files, not the original files, so the choice here is to use xlutils

But there is still a problem, if we use xlutils, then our excel file needs to be suffixed with .xls. Because it cannot be written with xlsx as the suffix, an error will be reported (pro-test, because the formatting_info parameter is not yet compatible with the format of the new version of xlsx)

Note: When xlrd operates excel, the row number and column number are calculated from 0

package code

"""
import xlrd
from xlutils import copy
from collections import namedtuple


class ParseExcel(object):
    # xlrd parses excel, row and column numbers start from 0
    def __init__(self, filename, sheet):
        try:
            self.filename = filename
            self.sheet = sheet
            self.wb = xlrd.open_workbook(self.filename, formatting_info=True)
            if isinstance(sheet, str):
                self.sheet = self.wb.sheet_by_name(sheet)
            elif isinstance(sheet, int):
                self.sheet = self.wb.sheet_by_index(sheet)
            else:
                raise TypeError('sheet must be int or str')
        except Exception as e:
            raise e

    def get_max_row(self):
        """Get the maximum line number of a form"""
        max_row_num = self.sheet.nrows
        return max_row_num

    def get_max_column(self):
        """Get the maximum column number of a form"""
        min_row_num = self.sheet.ncols
        return min_row_num

    def get_cell_value(self, row, column):
        """Get data from a cell"""
        if isinstance(row, int) and isinstance(column, int):
            values = self.sheet.cell(row-1, column-1).value
            return values
        else:
            raise TypeError('row and column must be type int')

    def get_row_values(self, row):
        """Get data for a row"""
        if isinstance(row, int):
            values = self.sheet.row_values(row-1)
            return values
        else:
            raise TypeError('row must be type int')

    def get_column_values(self, column):
        """Get data from a column"""

        if isinstance(column, int):
            values = self.sheet.col_values(column-1)
            return values
        else:
            raise TypeError('column must be type int')

    def get_table_title(self):
        """get header"""
        table_title = self.get_row_values(1)
        return table_title

    def get_all_values_dict(self):
        """Get all data, excluding headers, and return a list of nested dictionaries"""
        max_row = self.get_max_row()
        table_title = self.get_table_title()
        value_list = []
        for row in range(2, max_row):
            values = self.get_row_values(row)
            value_list.append(dict(zip(table_title, values)))
        return value_list

    def get_all_values_nametuple(self):
        """Get all data, excluding headers, and return a list of nested named tuples"""
        table_title = self.get_table_title()
        max_row = self.get_max_row()
        excel = namedtuple('excel', table_title)
        value_list = []
        for row in range(2, max_row):
            values = self.get_row_values(row)
            e = excel(*values)
            value_list.append(e)
        return value_list

    def write_value(self, sheet_index, row, column, value):
        """write data to a cell"""
        if isinstance(row, int) and isinstance(column, int):
            if isinstance(sheet_index, int):
                wb = copy.copy(self.wb)
                worksheet = wb.get_sheet(sheet_index)
                worksheet.write(row-1, column-1, value)
                wb.save(self.filename)
            else:
                raise TypeError('{} must be int'.format(sheet_index))
        else:
            raise TypeError('{} and {} must be int'.format(row, column))

if __name__ == '__main__':
    pe = ParseExcel('testdata.xls', 'testcase')
    print('maximum line number:', pe.get_max_row())
    print('maximum column number:', pe.get_max_column())
    print('2nd row 3rd column data:', pe.get_cell_value(2, 3))
    print('row 2 data', pe.get_row_values(2))
    print('Column 3 data', pe.get_column_values(3))
    print('header:', pe.get_table_title())
    print('All data returns a list of nested dictionaries:', pe.get_all_values_dict())
    print('All data returns a list of nested named tuples:', pe.get_all_values_nametuple())
   pe.write_value(0, 1, 3, 'test')

pandas
pandas is a library for data analysis. I always feel that using pandas to parse excel files to read data in automated tests is a bit overkill. Anyway, let’s write pandas to parse excel files.

I only encapsulated reading here. I have a little problem with writing. I will change it and add the code later.

Please pip install pandas to install pandas first

package code

import pandas as pd


class ParseExcel(object):
    def __init__(self, filename, sheet_name=None):
        try:
            self.filename = filename
            self.sheet_name = sheet_name
            self.df = pd.read_excel(self.filename, self.sheet_name)
        except Exception as e:
            raise e

    def get_row_num(self):
        """Get a list of line numbers, starting from 0"""
        row_num_list = self.df.index.values
        return row_num_list

    def get_cell_value(self, row, column):
        """Get data from a cell"""
        try:
            if isinstance(row, int) and isinstance(column, int):
                cell_value = self.df.ix[row-2, column-1] # The row parameter of ix is ​​according to the valid data row, and starts from 0
                return cell_value
            else:
                raise TypeError('row and column must be type int')
        except Exception as e:
            raise e

    def get_table_title(self):
        """get header, return list"""
        table_title = self.df.columns.values
        return table_title

    def get_row_value(self, row):
        """Get the data of a row, the row number starts from 1"""
        try:
            if isinstance(row, int):
                row_data = self.df.ix[row-2].values
                return row_data
            else:
                raise TypeError('row must be type int')
        except Exception as e:
            raise e

    def get_column_value(self, col_name):
        """Get a column of data"""
        try:
            if isinstance(col_name, str):
                col_data = self.df[col_name].values
                return col_data
            else:
                raise TypeError('col_name must be type str')
        except Exception as e:
            raise e
        
    def get_all_value(self):
        """Get all data, excluding headers, Returns a list of nested dictionaries"""
        rows_num = self.get_row_num()
        table_title = self.get_table_title()
        values_list = []
        for i in rows_num:
            row_data = self.df.ix[i, table_title].to_dict()
            values_list.append(row_data)
        return values_list


if __name__ == '__main__':
    pe = ParseExcel('testdata.xlsx', 'testcase')
    print(pe.get_row_num())
    print(pe.get_table_title())
    print(pe.get_all_value())
    print(pe.get_row_value(2))
    print(pe.get_cell_value(2, 3))
    print(pe.get_column_value('Tc_title'))

Summarize

I used 3 methods, 4 libraries xlrd, openpyxl, xlwt, pandas to operate excel files. Personally, I feel that using openpyxl is more suitable for use in automation. Of course, different people have different choices, and the difference between which one is used is not very big.

The above 3 methods can be used directly, no need to encapsulate!

Friends who are interested in test development, automated testing technology and ideas can join us at 642830685. group. Get the latest software testing factory interview materials and Python automation, interface, framework construction learning materials for free! There are peers to exchange and learn together, and technical experts answer questions

Tags: Python Excel software testing Testing Software automation

Posted by raven_web on Wed, 25 May 2022 15:58:56 +0300