3.3. Read Excel
File paths works also with URLs
3.3.1. SetUp
>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
3.3.2. Parameters
iosheet_nameheaderindex_col
3.3.3. Use Case - 1
>>> import pandas as pd
>>>
>>>
>>> DATA = 'https://python3.info/_static/astro-trl.xlsx'
>>>
>>> df = pd.read_excel(
... io=DATA,
... sheet_name='Polish',
... header=1,
... index_col=0)
3.3.4. Use Case - 2
>>> import pandas as pd
>>>
>>>
>>> DATA = 'https://python3.info/_static/aatc-mission-exp12.xlsx'
>>>
>>> df = pd.read_excel(
... io=DATA,
... sheet_name='Luminance',
... header=1,
... parse_dates=['datetime', 'date', 'time'],
... index_col='datetime')
3.3.5. Assignments
# %% About
# - Name: Pandas ReadExcel Data
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read data `DATA` in Excel format to Pandas DataFrame
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj dane `DATA` w formacie Excel do Pandas DataFrame
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# users Unnamed: 1 Unnamed: 2 Unnamed: 3 Unnamed: 4 Unnamed: 5 Unnamed: 6
# 0 firstname lastname age email lastlogin is_active groups
# 1 Alice Apricot 30 alice@example.com 2000-01-01 True users;staff
# 2 Bob Blackthorn 31 bob@example.com 2000-01-02 True users;staff
# 3 Carol Corn 32 carol@example.com 2000-01-03 True users
# 4 Dave Durian 33 dave@example.org 2000-01-04 True users
# 5 Eve Elderberry 34 eve@example.org 2000-01-05 True users;staff;admins
# 6 Mallory Melon 15 mallory@example.net NaN False NaN
# %% Hints
# - `DataFrame.read_excel()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result # doctest: +NORMALIZE_WHITESPACE
users Unnamed: 1 Unnamed: 2 Unnamed: 3 Unnamed: 4 Unnamed: 5 Unnamed: 6
0 firstname lastname age email lastlogin is_active groups
1 Alice Apricot 30 alice@example.com 2000-01-01 True users;staff
2 Bob Blackthorn 31 bob@example.com 2000-01-02 True users;staff
3 Carol Corn 32 carol@example.com 2000-01-03 True users
4 Dave Durian 33 dave@example.org 2000-01-04 True users
5 Eve Elderberry 34 eve@example.org 2000-01-05 True users;staff;admins
6 Mallory Melon 15 mallory@example.net NaN False NaN
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
import pandas as pd
# %% Types
result: pd.DataFrame
# %% Data
DATA = 'https://python3.info/_static/example.xlsx'
# %% Result
result = ...
# %% About
# - Name: Pandas ReadExcel Sheet Name
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read data `DATA` in Excel format to Pandas DataFrame
# 2. Read the sheet named `users`
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj dane `DATA` w formacie Excel do Pandas DataFrame
# 2. Wczytaj arkusz o nazwie `users`
# 3. Zdefiniuj zmienną `result` z rozwiązaniem
# 4. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# users Unnamed: 1 Unnamed: 2 Unnamed: 3 Unnamed: 4 Unnamed: 5 Unnamed: 6
# 0 firstname lastname age email lastlogin is_active groups
# 1 Alice Apricot 30 alice@example.com 2000-01-01 True users;staff
# 2 Bob Blackthorn 31 bob@example.com 2000-01-02 True users;staff
# 3 Carol Corn 32 carol@example.com 2000-01-03 True users
# 4 Dave Durian 33 dave@example.org 2000-01-04 True users
# 5 Eve Elderberry 34 eve@example.org 2000-01-05 True users;staff;admins
# 6 Mallory Melon 15 mallory@example.net NaN False NaN
# %% Hints
# - `DataFrame.read_excel(sheet_name=...)`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result # doctest: +NORMALIZE_WHITESPACE
users Unnamed: 1 Unnamed: 2 Unnamed: 3 Unnamed: 4 Unnamed: 5 Unnamed: 6
0 firstname lastname age email lastlogin is_active groups
1 Alice Apricot 30 alice@example.com 2000-01-01 True users;staff
2 Bob Blackthorn 31 bob@example.com 2000-01-02 True users;staff
3 Carol Corn 32 carol@example.com 2000-01-03 True users
4 Dave Durian 33 dave@example.org 2000-01-04 True users
5 Eve Elderberry 34 eve@example.org 2000-01-05 True users;staff;admins
6 Mallory Melon 15 mallory@example.net NaN False NaN
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
import pandas as pd
# %% Types
result: pd.DataFrame
# %% Data
DATA = 'https://python3.info/_static/example.xlsx'
# %% Result
result = ...
# %% About
# - Name: Pandas ReadExcel Complex
# - Difficulty: medium
# - Lines: 1
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read data `DATA` in Excel format to Pandas DataFrame
# 2. Read the sheet named `users`
# 3. Use `NAMES` as column names
# 4. Read only columns: 'firstname', 'lastname', 'age', 'email'
# 5. Set `email` as index column
# 6. Skip first row
# 7. Define variable `result` with the solution
# 8. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj dane `DATA` w formacie Excel do Pandas DataFrame
# 2. Wczytaj arkusz o nazwie `users`
# 3. Użyj `NAMES` jako nazw kolumn
# 4. Wczytaj tylko kolumny: 'firstname', 'lastname', 'age', 'email'
# 5. Ustaw `email` jako kolumnę indeksu
# 6. Pomiń pierwszy wiersz
# 7. Zdefiniuj zmienną `result` z rozwiązaniem
# 8. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# firstname lastname age
# email
# alice@example.com Alice Apricot 30
# bob@example.com Bob Blackthorn 31
# carol@example.com Carol Corn 32
# dave@example.org Dave Durian 33
# eve@example.org Eve Elderberry 34
# mallory@example.net Mallory Melon 15
# %% Hints
# - `DataFrame.read_csv()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` has an invalid type; expected: `pd.DataFrame`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> result # doctest: +NORMALIZE_WHITESPACE
firstname lastname age
email
alice@example.com Alice Apricot 30
bob@example.com Bob Blackthorn 31
carol@example.com Carol Corn 32
dave@example.org Dave Durian 33
eve@example.org Eve Elderberry 34
mallory@example.net Mallory Melon 15
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
import pandas as pd
# %% Types
result: pd.DataFrame
# %% Data
DATA = 'https://python3.info/_static/example.xlsx'
NAMES = [
'firstname',
'lastname',
'age',
'email',
'lastlogin',
'is_active',
'groups',
]
# %% Result
result = ...