

Always code as if the guy who ends up maintaining your code will be a violent psychopath who knows where you live.
— Martin Golding
Readability counts.
Indeed, the ratio of time spent reading versus writing is well over 10 to 1. We are constantly reading old code as part of the effort to write new code. …Therefore, making it easy to read makes it easier to write.
— Robert C. Martin, Clean Code: A Handbook of Agile Software Craftsmanship


Bad example

Good example

assert statement
pytest uses the built-in assert for universal testing of native Python objectsassert_frame_equal in pandasassert_approx_df_equality in chispa (for Spark)def to_snake_case(words: Sequence[str]) -> str:
return '_'.join(words)
def test_to_snake_case():
test_input = ['talk', 'to', 'the', 'hand']
result = to_snake_case(test_input)
assert result == 'talk_to_the_hand'
“Never allow the same bug to bite you twice.”
— Steve Maguire
Devs watching QA test the product pic.twitter.com/uuLTButB3x
— sanja zakovska 🌱 (@sanjazakovska) January 22, 2021
Numbers — try:
Data types — try:
@pytest.fixture
def input_df_pandas():
"""Return simple pandas input df for index method tests."""
return create_dataframe([
('prices', 'base_prices', 'quantity', 'base_quantity'),
(2.46, 2.46, 17.0, 16.6),
(7.32, 7.2, 5.3, 5.4),
(1.13, 1.1, 2.1, 2.1),
(12.39, 11.2, 12.9, 13.3),
(6.63, 6.8, 7.2, 7.4),
])
@pytest.fixture
def my_fixture():
return value
conftest.py@pytest.fixture(scope="session")
def spark_session():
"""Set up spark session fixture."""
print('Setting up test spark session')
os.environ['PYSPARK_PYTHON'] = '/usr/local/bin/python3'
suppress_py4j_logging()
return (
SparkSession
.builder
.master("local[2]")
.appName("cprices_test_context")
.config("spark.sql.shuffle.partitions", 1)
# .config("spark.jars", jar_path)
# This stops progress bars appearing in the console whilst running
.config('spark.ui.showConsoleProgress', 'false')
.getOrCreate()
)
@pytest.fixture
def to_spark(spark_session):
"""Convert pandas df to spark."""
def _(df: pd.DataFrame):
return spark_session.createDataFrame(df)
return _
input_df_pandas from before@pytest.fixture
def input_df(to_spark, input_df_pandas):
"""Return simple spark input df for index method tests."""
return to_spark(input_df_pandas)
self to parametersclass TestMyFunc:
"""Group of tests for my_func."""
@pytest.mark.skip(reason="test shell")
def test_my_func(self):
"""Test for my_func."""
pass
Mark your test shells as skip so that you can see where you have missing tests at a glance.
@pytest.mark.parametrize(
'digits,expected',
[(3, 5.786) (1, 5.8), (0, 6), (8, 5.78646523)]
)
def test_round(digits, expected):
assert round(5.78646523, digits) == expected
Check out mitches-got-glitches/testing-tips for more info and examples.
@parametrize_cases(
Case(
label="carli_fixed_base",
index_method='carli',
base_price_method='fixed_base',
expout='large_output_carli_fixed_base.csv',
),
Case(
label="dutot_fixed_base",
index_method='dutot',
base_price_method='fixed_base',
expout='large_output_dutot_fixed_base.csv',
),
Case(
label="jevons_fixed_base",
index_method='jevons',
base_price_method='fixed_base',
expout='large_output_jevons_fixed_base.csv',
),
Case(
label="laspeyres_fixed_base",
index_method='laspeyres',
base_price_method='fixed_base',
expout='large_output_laspeyres_fixed_base.csv',
),
Case(
label="paasche_fixed_base",
index_method='paasche',
base_price_method='fixed_base',
expout='large_output_paasche_fixed_base.csv',
),
Case(
label="fisher_fixed_base",
index_method='fisher',
base_price_method='fixed_base',
expout='large_output_fisher_fixed_base.csv',
),
Case(
label="tornqvist_fixed_base",
index_method='tornqvist',
base_price_method='fixed_base',
expout='large_output_tornqvist_fixed_base.csv',
),
Case(
label="carli_chained",
index_method='carli',
base_price_method='chained',
expout='large_output_carli_chained.csv',
),
Case(
label="dutot_chained",
index_method='dutot',
base_price_method='chained',
expout='large_output_dutot_chained.csv',
),
Case(
label="jevons_chained",
index_method='jevons',
base_price_method='chained',
expout='large_output_jevons_chained.csv',
),
Case(
label="jevons_bilateral",
index_method='jevons',
base_price_method='bilateral',
expout='large_output_jevons_bilateral.csv',
),
Case(
label="laspeyres_bilateral",
index_method='laspeyres',
base_price_method='bilateral',
expout='large_output_laspeyres_bilateral.csv',
),
Case(
label="paasche_bilateral",
index_method='paasche',
base_price_method='bilateral',
expout='large_output_paasche_bilateral.csv',
),
Case(
label="fisher_bilateral",
index_method='fisher',
base_price_method='bilateral',
expout='large_output_fisher_bilateral.csv',
),
Case(
label="tornqvist_bilateral",
index_method='tornqvist',
base_price_method='bilateral',
expout='large_output_tornqvist_bilateral.csv',
),
Case(
label="jevons_fixed_base_with_rebase",
index_method='jevons',
base_price_method='fixed_base_with_rebase',
expout='large_output_jevons_rebased_unchained.csv',
),
Case(
label="tornqvist_fixed_base_with_rebase",
index_method='tornqvist',
base_price_method='fixed_base_with_rebase',
expout='large_output_tornqvist_rebased_unchained.csv',
),
)
def test_index_scenarios(
input_data_large,
index_method,
base_price_method,
expout,
filename_to_pandas,
):
"""Test for all different combinations of index method."""
expected_output = filename_to_pandas(expout)
actual_output = calculate_index(
input_data_large,
date_col='month',
levels=['group', 'id'],
base_price_method=base_price_method,
index_method=index_method,
)
assert_frame_equal(actual_output.reset_index(), expected_output)

Add these to your .bashrc
bind '"\e[A": history-search-backward'
bind '"\e[B": history-search-forward'
bind '"\eOA": history-search-backward'
bind '"\eOB": history-search-forward'
Quality Assurance of Code for Analysis and Research
— Best Practice and Impact team