foreword
In automated testing, we store data nothing more than using files or databases, so the files can be csv, xlsx, xml, or even txt files. Usually, excel files are often our first choice. Whether it is writing test cases or storing test data, excel is Very convenient. So today, we will summarize the methods of processing excel files in different modules, and directly encapsulate them, so that we can use them directly in the future and increase work efficiency.
openpyxl
openpyxl is a third-party library. First, we use the command pip install openpyxl to install it directly
Note: When openpyxl operates excel, the row number and column number are calculated from 1
package code
from openpyxl import load_workbook from openpyxl.styles import Font from openpyxl.styles.colors import BLACK from collections import namedtuple class ParseExcel(object): """Parse excel document""" def __init__(self, filename, sheet_name=None): try: self.filename = filename self.sheet_name = sheet_name self.wb = load_workbook(self.filename) if self.sheet_name is None: self.work_sheet = self.wb.active else: self.work_sheet = self.wb[self.sheet_name] except FileNotFoundError as e: raise e def get_max_row_num(self): """get max line number""" max_row_num = self.work_sheet.max_row return max_row_num def get_max_column_num(self): """get maximum column number""" max_column = self.work_sheet.max_column return max_column def get_cell_value(self, coordinate=None, row=None, column=None): """Get the data of the specified cell""" if coordinate is not None: try: return self.work_sheet[coordinate].value except Exception as e: raise e elif coordinate is None and row is not None and column is not None: if isinstance(row, int) and isinstance(column, int): return self.work_sheet.cell(row=row, column=column).value else: raise TypeError('row and column must be type int') else: raise Exception("Insufficient Coordinate of cell!") def get_row_value(self, row): """Get data for a row""" column_num = self.get_max_column_num() row_value = [] if isinstance(row, int): for column in range(1, column_num + 1): values_row = self.work_sheet.cell(row, column).value row_value.append(values_row) return row_value else: raise TypeError('row must be type int') def get_column_value(self, column): """Get a column of data""" row_num = self.get_max_column_num() column_value = [] if isinstance(column, int): for row in range(1, row_num + 1): values_column = self.work_sheet.cell(row, column).value column_value.append(values_column) return column_value else: raise TypeError('column must be type int') def get_all_value_1(self): """Get all data of the specified form(remove header)""" max_row_num = self.get_max_row_num() max_column = self.get_max_column_num() values = [] for row in range(2, max_row_num + 1): value_list = [] for column in range(1, max_column + 1): value = self.work_sheet.cell(row, column).value value_list.append(value) values.append(value_list) return values def get_all_value_2(self): """Get all data of the specified form(remove header)""" rows_obj = self.work_sheet.iter_rows(min_row=2, max_row=self.work_sheet.max_row, values_only=True) # Specifying values_only will directly extract data without using cell().value values = [] for row_tuple in rows_obj: value_list = [] for value in row_tuple: value_list.append(value) values.append(value_list) return values def get_excel_title(self): """Obtain sheet header""" title_key = tuple(self.work_sheet.iter_rows(max_row=1, values_only=True))[0] return title_key def get_listdict_all_value(self): """Get all data, return a list of nested dictionaries""" sheet_title = self.get_excel_title() all_values = self.get_all_value_2() value_list = [] for value in all_values: value_list.append(dict(zip(sheet_title, value))) return value_list def get_list_nametuple_all_value(self): """Get all data, return a list of nested named tuples""" sheet_title = self.get_excel_title() values = self.get_all_value_2() excel = namedtuple('excel', sheet_title) value_list = [] for value in values: e = excel(*value) value_list.append(e) return value_list def write_cell(self, row, column, value=None, bold=True, color=BLACK): """ Write data to the specified cell :param work_sheet: :param row: line number :param column: column number :param value: data to be written :param bold: bold, default bold :param color: font color, default black :return: """ try: if isinstance(row, int) and isinstance(column, int): cell_obj = self.work_sheet.cell(row, column) cell_obj.font = Font(color=color, bold=bold) cell_obj.value = value self.wb.save(self.filename) else: raise TypeError('row and column must be type int') except Exception as e: raise e if __name__ == '__main__': pe = ParseExcel('testdata.xlsx') # sheet = pe.get_sheet_object('testcase') column_row = pe.get_max_column_num() print('maximum column number:', column_row) max_row = pe.get_max_row_num() print('maximum line number:', max_row) # cell_value_1 = pe.get_cell_value(row=2, column=3) print('the first%d OK, the first%d The column data is: %s' % (2, 3, cell_value_1)) cell_value_2 = pe.get_cell_value(coordinate='A5') print('A5 The cell data is: {}'.format(cell_value_2)) value_row = pe.get_row_value(3) print('the first{}The row data is:{}'.format(3, value_row)) value_column = pe.get_column_value(2) print('the first{}The column data is:{}'.format(2, value_column)) # values_1 = pe.get_all_value_1() print('The first way to get all the data\n', values_1) values_2 = pe.get_all_value_2() print('The second way to get all the data\n', values_2) title = pe.get_excel_title() print('The header is\n{}'.format(title)) dict_value = pe.get_listdict_all_value() print('a list of nested dictionaries of all data:\n', dict_value) # namedtuple_value = pe.get_list_nametuple_all_value() print('a list of nested named tuples of all data:\n', namedtuple_value) pe.write_cell(1, 2, 'Tc_title')
xlrd
Install xlrd, this module only supports read operations. If you want to write, you need to use xlwt or use xlutils to cooperate with xlrd, but using xlwt can only write new excel files, not the original files, so the choice here is to use xlutils
But there is still a problem, if we use xlutils, then our excel file needs to be suffixed with .xls. Because it cannot be written with xlsx as the suffix, an error will be reported (pro-test, because the formatting_info parameter is not yet compatible with the format of the new version of xlsx)
Note: When xlrd operates excel, the row number and column number are calculated from 0
package code
""" import xlrd from xlutils import copy from collections import namedtuple class ParseExcel(object): # xlrd parses excel, row and column numbers start from 0 def __init__(self, filename, sheet): try: self.filename = filename self.sheet = sheet self.wb = xlrd.open_workbook(self.filename, formatting_info=True) if isinstance(sheet, str): self.sheet = self.wb.sheet_by_name(sheet) elif isinstance(sheet, int): self.sheet = self.wb.sheet_by_index(sheet) else: raise TypeError('sheet must be int or str') except Exception as e: raise e def get_max_row(self): """Get the maximum line number of a form""" max_row_num = self.sheet.nrows return max_row_num def get_max_column(self): """Get the maximum column number of a form""" min_row_num = self.sheet.ncols return min_row_num def get_cell_value(self, row, column): """Get data from a cell""" if isinstance(row, int) and isinstance(column, int): values = self.sheet.cell(row-1, column-1).value return values else: raise TypeError('row and column must be type int') def get_row_values(self, row): """Get data for a row""" if isinstance(row, int): values = self.sheet.row_values(row-1) return values else: raise TypeError('row must be type int') def get_column_values(self, column): """Get data from a column""" if isinstance(column, int): values = self.sheet.col_values(column-1) return values else: raise TypeError('column must be type int') def get_table_title(self): """get header""" table_title = self.get_row_values(1) return table_title def get_all_values_dict(self): """Get all data, excluding headers, and return a list of nested dictionaries""" max_row = self.get_max_row() table_title = self.get_table_title() value_list = [] for row in range(2, max_row): values = self.get_row_values(row) value_list.append(dict(zip(table_title, values))) return value_list def get_all_values_nametuple(self): """Get all data, excluding headers, and return a list of nested named tuples""" table_title = self.get_table_title() max_row = self.get_max_row() excel = namedtuple('excel', table_title) value_list = [] for row in range(2, max_row): values = self.get_row_values(row) e = excel(*values) value_list.append(e) return value_list def write_value(self, sheet_index, row, column, value): """write data to a cell""" if isinstance(row, int) and isinstance(column, int): if isinstance(sheet_index, int): wb = copy.copy(self.wb) worksheet = wb.get_sheet(sheet_index) worksheet.write(row-1, column-1, value) wb.save(self.filename) else: raise TypeError('{} must be int'.format(sheet_index)) else: raise TypeError('{} and {} must be int'.format(row, column)) if __name__ == '__main__': pe = ParseExcel('testdata.xls', 'testcase') print('maximum line number:', pe.get_max_row()) print('maximum column number:', pe.get_max_column()) print('2nd row 3rd column data:', pe.get_cell_value(2, 3)) print('row 2 data', pe.get_row_values(2)) print('Column 3 data', pe.get_column_values(3)) print('header:', pe.get_table_title()) print('All data returns a list of nested dictionaries:', pe.get_all_values_dict()) print('All data returns a list of nested named tuples:', pe.get_all_values_nametuple()) pe.write_value(0, 1, 3, 'test')
pandas
pandas is a library for data analysis. I always feel that using pandas to parse excel files to read data in automated tests is a bit overkill. Anyway, let’s write pandas to parse excel files.
I only encapsulated reading here. I have a little problem with writing. I will change it and add the code later.
Please pip install pandas to install pandas first
package code
import pandas as pd class ParseExcel(object): def __init__(self, filename, sheet_name=None): try: self.filename = filename self.sheet_name = sheet_name self.df = pd.read_excel(self.filename, self.sheet_name) except Exception as e: raise e def get_row_num(self): """Get a list of line numbers, starting from 0""" row_num_list = self.df.index.values return row_num_list def get_cell_value(self, row, column): """Get data from a cell""" try: if isinstance(row, int) and isinstance(column, int): cell_value = self.df.ix[row-2, column-1] # The row parameter of ix is according to the valid data row, and starts from 0 return cell_value else: raise TypeError('row and column must be type int') except Exception as e: raise e def get_table_title(self): """get header, return list""" table_title = self.df.columns.values return table_title def get_row_value(self, row): """Get the data of a row, the row number starts from 1""" try: if isinstance(row, int): row_data = self.df.ix[row-2].values return row_data else: raise TypeError('row must be type int') except Exception as e: raise e def get_column_value(self, col_name): """Get a column of data""" try: if isinstance(col_name, str): col_data = self.df[col_name].values return col_data else: raise TypeError('col_name must be type str') except Exception as e: raise e def get_all_value(self): """Get all data, excluding headers, Returns a list of nested dictionaries""" rows_num = self.get_row_num() table_title = self.get_table_title() values_list = [] for i in rows_num: row_data = self.df.ix[i, table_title].to_dict() values_list.append(row_data) return values_list if __name__ == '__main__': pe = ParseExcel('testdata.xlsx', 'testcase') print(pe.get_row_num()) print(pe.get_table_title()) print(pe.get_all_value()) print(pe.get_row_value(2)) print(pe.get_cell_value(2, 3)) print(pe.get_column_value('Tc_title'))
Summarize
I used 3 methods, 4 libraries xlrd, openpyxl, xlwt, pandas to operate excel files. Personally, I feel that using openpyxl is more suitable for use in automation. Of course, different people have different choices, and the difference between which one is used is not very big.
The above 3 methods can be used directly, no need to encapsulate!
Friends who are interested in test development, automated testing technology and ideas can join us at 642830685. group. Get the latest software testing factory interview materials and Python automation, interface, framework construction learning materials for free! There are peers to exchange and learn together, and technical experts answer questions