Always code as if the guy who ends up maintaining your code will be a violent psychopath who knows where you live.
— Martin Golding
Readability counts.
Indeed, the ratio of time spent reading versus writing is well over 10 to 1. We are constantly reading old code as part of the effort to write new code. …Therefore, making it easy to read makes it easier to write.
— Robert C. Martin, Clean Code: A Handbook of Agile Software Craftsmanship
Bad example
Good example
assert
statement
pytest
uses the built-in assert
for universal testing of native Python objectsassert_frame_equal
in pandas
assert_approx_df_equality
in chispa
(for Spark)def to_snake_case(words: Sequence[str]) -> str:
return '_'.join(words)
def test_to_snake_case():
test_input = ['talk', 'to', 'the', 'hand']
result = to_snake_case(test_input)
assert result == 'talk_to_the_hand'
“Never allow the same bug to bite you twice.”
— Steve Maguire
Devs watching QA test the product pic.twitter.com/uuLTButB3x
— sanja zakovska 🌱 (@sanjazakovska) January 22, 2021
Numbers — try:
Data types — try:
@pytest.fixture
def input_df_pandas():
"""Return simple pandas input df for index method tests."""
return create_dataframe([
('prices', 'base_prices', 'quantity', 'base_quantity'),
(2.46, 2.46, 17.0, 16.6),
(7.32, 7.2, 5.3, 5.4),
(1.13, 1.1, 2.1, 2.1),
(12.39, 11.2, 12.9, 13.3),
(6.63, 6.8, 7.2, 7.4),
])
@pytest.fixture
def my_fixture():
return value
conftest.py
@pytest.fixture(scope="session")
def spark_session():
"""Set up spark session fixture."""
print('Setting up test spark session')
os.environ['PYSPARK_PYTHON'] = '/usr/local/bin/python3'
suppress_py4j_logging()
return (
SparkSession
.builder
.master("local[2]")
.appName("cprices_test_context")
.config("spark.sql.shuffle.partitions", 1)
# .config("spark.jars", jar_path)
# This stops progress bars appearing in the console whilst running
.config('spark.ui.showConsoleProgress', 'false')
.getOrCreate()
)
@pytest.fixture
def to_spark(spark_session):
"""Convert pandas df to spark."""
def _(df: pd.DataFrame):
return spark_session.createDataFrame(df)
return _
input_df_pandas
from before@pytest.fixture
def input_df(to_spark, input_df_pandas):
"""Return simple spark input df for index method tests."""
return to_spark(input_df_pandas)
self
to parametersclass TestMyFunc:
"""Group of tests for my_func."""
@pytest.mark.skip(reason="test shell")
def test_my_func(self):
"""Test for my_func."""
pass
Mark your test shells as skip so that you can see where you have missing tests at a glance.
@pytest.mark.parametrize(
'digits,expected',
[(3, 5.786) (1, 5.8), (0, 6), (8, 5.78646523)]
)
def test_round(digits, expected):
assert round(5.78646523, digits) == expected
Check out mitches-got-glitches/testing-tips for more info and examples.
@parametrize_cases(
Case(
label="carli_fixed_base",
index_method='carli',
base_price_method='fixed_base',
expout='large_output_carli_fixed_base.csv',
),
Case(
label="dutot_fixed_base",
index_method='dutot',
base_price_method='fixed_base',
expout='large_output_dutot_fixed_base.csv',
),
Case(
label="jevons_fixed_base",
index_method='jevons',
base_price_method='fixed_base',
expout='large_output_jevons_fixed_base.csv',
),
Case(
label="laspeyres_fixed_base",
index_method='laspeyres',
base_price_method='fixed_base',
expout='large_output_laspeyres_fixed_base.csv',
),
Case(
label="paasche_fixed_base",
index_method='paasche',
base_price_method='fixed_base',
expout='large_output_paasche_fixed_base.csv',
),
Case(
label="fisher_fixed_base",
index_method='fisher',
base_price_method='fixed_base',
expout='large_output_fisher_fixed_base.csv',
),
Case(
label="tornqvist_fixed_base",
index_method='tornqvist',
base_price_method='fixed_base',
expout='large_output_tornqvist_fixed_base.csv',
),
Case(
label="carli_chained",
index_method='carli',
base_price_method='chained',
expout='large_output_carli_chained.csv',
),
Case(
label="dutot_chained",
index_method='dutot',
base_price_method='chained',
expout='large_output_dutot_chained.csv',
),
Case(
label="jevons_chained",
index_method='jevons',
base_price_method='chained',
expout='large_output_jevons_chained.csv',
),
Case(
label="jevons_bilateral",
index_method='jevons',
base_price_method='bilateral',
expout='large_output_jevons_bilateral.csv',
),
Case(
label="laspeyres_bilateral",
index_method='laspeyres',
base_price_method='bilateral',
expout='large_output_laspeyres_bilateral.csv',
),
Case(
label="paasche_bilateral",
index_method='paasche',
base_price_method='bilateral',
expout='large_output_paasche_bilateral.csv',
),
Case(
label="fisher_bilateral",
index_method='fisher',
base_price_method='bilateral',
expout='large_output_fisher_bilateral.csv',
),
Case(
label="tornqvist_bilateral",
index_method='tornqvist',
base_price_method='bilateral',
expout='large_output_tornqvist_bilateral.csv',
),
Case(
label="jevons_fixed_base_with_rebase",
index_method='jevons',
base_price_method='fixed_base_with_rebase',
expout='large_output_jevons_rebased_unchained.csv',
),
Case(
label="tornqvist_fixed_base_with_rebase",
index_method='tornqvist',
base_price_method='fixed_base_with_rebase',
expout='large_output_tornqvist_rebased_unchained.csv',
),
)
def test_index_scenarios(
input_data_large,
index_method,
base_price_method,
expout,
filename_to_pandas,
):
"""Test for all different combinations of index method."""
expected_output = filename_to_pandas(expout)
actual_output = calculate_index(
input_data_large,
date_col='month',
levels=['group', 'id'],
base_price_method=base_price_method,
index_method=index_method,
)
assert_frame_equal(actual_output.reset_index(), expected_output)
Add these to your .bashrc
bind '"\e[A": history-search-backward'
bind '"\e[B": history-search-forward'
bind '"\eOA": history-search-backward'
bind '"\eOB": history-search-forward'
Quality Assurance of Code for Analysis and Research
— Best Practice and Impact team