Skip to content

Commit e1f6f45

Browse files
authored
Merge pull request #162 from PythonPredictions/develop
Release v1.1.1: merging 2023-03 development branch to master for 2023-03 release.
2 parents 16342ab + 2dfc309 commit e1f6f45

23 files changed

+973
-397
lines changed

.github/ISSUE_TEMPLATE/issue.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
name: Task
3-
about: A small issue t. It will usually be labeled as `good first issue` or `enhancement`.
3+
about: A small issue. It will usually be labeled as `good first issue` or `enhancement`.
44
---
55

66
<!-- Issue title should mirror the Task Title. -->
@@ -11,4 +11,4 @@ Task: I am an Issue
1111

1212
## Task Description
1313

14-
This issue will...
14+
This issue will...

.github/workflows/development_CI.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
# Runs CI when pushing to develop branch
2-
# runs pylint and pytest
1+
## Runs CI when pushing to develop branch
32

43
name: CI_develop_action
54

@@ -26,7 +25,7 @@ jobs:
2625
run: |
2726
python -m pip install --upgrade pip
2827
python -m pip install -r requirements.txt
29-
python -m pip install pylint pytest pytest-mock pytest-cov
28+
python -m pip install -r requirements.dev.txt
3029
3130
- name: Test with pytest
3231
run: |

.github/workflows/master_CI.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
# Runs CI when pushing to master branch
2-
# runs pylint and pytest
1+
## Runs CI when pushing to master branch
32

43
name: CI_master_action
54

@@ -26,7 +25,7 @@ jobs:
2625
run: |
2726
python -m pip install --upgrade pip
2827
python -m pip install -r requirements.txt
29-
python -m pip install pylint pytest pytest-mock pytest-cov
28+
python -m pip install -r requirements.dev.txt
3029
3130
- name: Test with pytest
3231
run: |

.github/workflows/master_publish_pypi.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
# Publishes code to pip when we publish a new release
2-
# runs pylint and pytest
1+
## Publishes code to pip when we publish a new release
32

43
name: publish_to_pip
54

Makefile

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Makefile with some simple commands to make developer's life easier
2+
3+
4+
install-requirements: install-build-essential
5+
pip install -r requirements.txt
6+
7+
dev/install-requirements: install-requirements
8+
pip install -r requirements.dev.txt
9+
10+
install-build-essential:
11+
sudo apt-get update
12+
sudo apt-get install build-essential
13+
14+
update-setuptools:
15+
pip install --upgrade setuptools wheel
16+
17+
test-unit:
18+
pytest tests
19+
@echo 'unit tests OK'
20+
21+
lint:
22+
pylint cobra
23+
@echo 'lint OK'
24+
25+
lint-minimal:
26+
pylint E cobra
27+
@echo 'lint minimal OK'
28+
29+
typecheck:
30+
mypy cobra
31+
@echo 'typecheck OK'
32+
33+
codestyle:
34+
pycodestyle cobra
35+
@echo 'codestyle OK'
36+
37+
docstyle:
38+
pydocstyle cobra
39+
@echo 'docstyle OK'
40+
41+
code-qa: typecheck codestyle docstyle lint-minimal

README.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ This package requires only the usual Python libraries for data science, being nu
4242
pip install -r requirements.txt
4343

4444

45-
**Note**: if you want to install Cobra with e.g. pip, you don't have to install all of these requirements as these are automatically installed with Cobra itself.
45+
**Note**: if you want to install Cobra with e.g. pip, you don't have to install all these requirements as these are automatically installed with Cobra itself.
4646

4747
Installation
4848
------------
@@ -61,9 +61,7 @@ Documentation and extra material
6161

6262
- HTML documentation of the `individual modules <https://pythonpredictions.github.io/cobra.io/docstring/modules.html>`_.
6363

64-
- A step-by-step `tutorial <https://pythonpredictions.github.io/cobra/tutorials/tutorial_Cobra_logistic_regression.ipynb>`_ for **logistic regression**.
65-
66-
- A step-by-step `tutorial <https://pythonpredictions.github.io/cobra/tutorials/tutorial_Cobra_linear_regression.ipynb>`__ for **linear regression**.
64+
- Step-by-step `tutorials <https://github.com/PythonPredictions/cobra/blob/master/tutorials>`_ for a logistic and a linear regression use case.
6765

6866
- Check out the Data Science Leuven Meetup `talk <https://www.youtube.com/watch?v=w7ceZZqMEaA&feature=youtu.be>`_ by one of the core developers (second presentation). His `slides <https://github.com/PythonPredictions/Cobra-DS-meetup-Leuven/blob/main/DS_Leuven_meetup_20210209_cobra.pdf>`_ and `related material <https://github.com/PythonPredictions/Cobra-DS-meetup-Leuven>`_ are also available.
6967

cobra/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,7 @@
1-
from .version import __version__
1+
from .version import __version__
2+
from cobra.utils import log_tutorial
3+
import logging
4+
5+
logging.basicConfig(level=logging.INFO, format="%(message)s")
6+
7+
log_tutorial()

cobra/evaluation/evaluator.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,14 @@ def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)):
187187
label="ROC curve (area = {s:.3})".format(s=auc))
188188

189189
ax.plot([0, 1], [0, 1], color="darkorange", linewidth=3,
190-
linestyle="--")
191-
ax.set_xlabel("False Positive Rate", fontsize=15)
192-
ax.set_ylabel("True Positive Rate", fontsize=15)
190+
linestyle="--", label="random selection")
191+
ax.set_xlabel("False positive rate", fontsize=15)
192+
ax.set_ylabel("True positive rate", fontsize=15)
193193
ax.legend(loc="lower right")
194194
ax.set_title("ROC curve", fontsize=20)
195195

196+
ax.set_ylim([0, 1])
197+
196198
if path:
197199
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
198200

@@ -224,6 +226,8 @@ def plot_confusion_matrix(self, path: str=None, dim: tuple=(12, 8),
224226
fmt="s", cmap="Blues",
225227
xticklabels=labels, yticklabels=labels)
226228
ax.set_title("Confusion matrix", fontsize=20)
229+
plt.ylabel('True labels', fontsize=15)
230+
plt.xlabel('Predicted labels', fontsize=15)
227231

228232
if path:
229233
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
@@ -256,13 +260,13 @@ def plot_cumulative_response_curve(self, path: str=None, dim: tuple=(12, 8)):
256260

257261
plt.bar(x_labels[::-1], lifts, align="center",
258262
color="cornflowerblue")
259-
plt.ylabel("response (%)", fontsize=16)
260-
plt.xlabel("decile", fontsize=16)
263+
plt.ylabel("Response (%)", fontsize=15)
264+
plt.xlabel("Decile", fontsize=15)
261265
ax.set_xticks(x_labels)
262266
ax.set_xticklabels(x_labels)
263267

264268
plt.axhline(y=inc_rate*100, color="darkorange", linestyle="--",
265-
xmin=0.05, xmax=0.95, linewidth=3, label="Incidence")
269+
xmin=0.05, xmax=0.95, linewidth=3, label="incidence")
266270

267271
# Legend
268272
ax.legend(loc="upper right")
@@ -305,13 +309,13 @@ def plot_lift_curve(self, path: str=None, dim: tuple=(12, 8)):
305309

306310
plt.bar(x_labels[::-1], lifts, align="center",
307311
color="cornflowerblue")
308-
plt.ylabel("lift", fontsize=16)
309-
plt.xlabel("decile", fontsize=16)
312+
plt.ylabel("Lift", fontsize=15)
313+
plt.xlabel("Decile", fontsize=15)
310314
ax.set_xticks(x_labels)
311315
ax.set_xticklabels(x_labels)
312316

313317
plt.axhline(y=1, color="darkorange", linestyle="--",
314-
xmin=0.05, xmax=0.95, linewidth=3, label="Baseline")
318+
xmin=0.05, xmax=0.95, linewidth=3, label="baseline")
315319

316320
# Legend
317321
ax.legend(loc="upper right")
@@ -354,7 +358,9 @@ def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)):
354358

355359
# Format axes
356360
ax.set_xlim([0, 100])
357-
ax.set_ylim([0, 105])
361+
ax.set_ylim([0, 100])
362+
plt.ylabel("Gain", fontsize=15)
363+
plt.xlabel("Percentage", fontsize=15)
358364

359365
# Format ticks
360366
ticks_loc_y = ax.get_yticks().tolist()

cobra/evaluation/pigs_tables.py

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
import cobra.utils as utils
99

1010
def generate_pig_tables(basetable: pd.DataFrame,
11-
id_column_name: str,
1211
target_column_name: str,
13-
preprocessed_predictors: list) -> pd.DataFrame:
12+
preprocessed_predictors: list,
13+
id_column_name: str = None) -> pd.DataFrame:
1414
"""Compute PIG tables for all predictors in preprocessed_predictors.
1515
1616
The output is a DataFrame with columns ``variable``, ``label``,
@@ -20,35 +20,41 @@ def generate_pig_tables(basetable: pd.DataFrame,
2020
----------
2121
basetable : pd.DataFrame
2222
Basetable to compute PIG tables from.
23-
id_column_name : str
24-
Name of the basetable column containing the IDs of the basetable rows
25-
(e.g. customernumber).
2623
target_column_name : str
2724
Name of the basetable column containing the target values to predict.
2825
preprocessed_predictors: list
2926
List of basetable column names containing preprocessed predictors.
30-
27+
id_column_name : str, default=None
28+
Name of the basetable column containing the IDs of the basetable rows
29+
(e.g. customernumber).
3130
Returns
3231
-------
3332
pd.DataFrame
3433
DataFrame containing a PIG table for all predictors.
3534
"""
35+
36+
#check if there is a id-column and define no_predictor accordingly
37+
if id_column_name == None:
38+
no_predictor = [target_column_name]
39+
else:
40+
no_predictor = [id_column_name, target_column_name]
41+
42+
3643
pigs = [
3744
compute_pig_table(basetable,
3845
column_name,
3946
target_column_name,
40-
id_column_name)
47+
)
4148
for column_name in sorted(preprocessed_predictors)
42-
if column_name not in [id_column_name, target_column_name]
49+
if column_name not in no_predictor
4350
]
44-
output = pd.concat(pigs)
51+
output = pd.concat(pigs, ignore_index=True)
4552
return output
4653

4754

4855
def compute_pig_table(basetable: pd.DataFrame,
4956
predictor_column_name: str,
50-
target_column_name: str,
51-
id_column_name: str) -> pd.DataFrame:
57+
target_column_name: str) -> pd.DataFrame:
5258
"""Compute the PIG table of a given predictor for a given target.
5359
5460
Parameters
@@ -59,8 +65,6 @@ def compute_pig_table(basetable: pd.DataFrame,
5965
Predictor name of which to compute the pig table.
6066
target_column_name : str
6167
Name of the target variable.
62-
id_column_name : str
63-
Name of the id column (used to count population size).
6468
6569
Returns
6670
-------
@@ -70,14 +74,20 @@ def compute_pig_table(basetable: pd.DataFrame,
7074
global_avg_target = basetable[target_column_name].mean()
7175

7276
# group by the binned variable, compute the incidence
73-
# (=mean of the target for the given bin) and compute the bin size
77+
# (= mean of the target for the given bin) and compute the bin size
7478
# (e.g. COUNT(id_column_name)). After that, rename the columns
79+
7580
res = (basetable.groupby(predictor_column_name)
76-
.agg({target_column_name: "mean", id_column_name: "size"})
81+
.agg(
82+
avg_target = (target_column_name, "mean"),
83+
pop_size = (target_column_name, "size")
84+
)
7785
.reset_index()
78-
.rename(columns={predictor_column_name: "label",
79-
target_column_name: "avg_target",
80-
id_column_name: "pop_size"}))
86+
.rename(
87+
columns={predictor_column_name: "label"}
88+
)
89+
)
90+
8191

8292
# add the column name to a variable column
8393
# add the average incidence
@@ -165,9 +175,9 @@ def plot_incidence(pig_tables: pd.DataFrame,
165175
ax.plot(np.nan, "#939598", linewidth=6, label='bin size')
166176

167177
# Set labels & ticks
168-
ax.set_ylabel('incidence' if model_type == "classification" else "mean target value",
178+
ax.set_ylabel('Incidence' if model_type == "classification" else "Mean target value",
169179
fontsize=16)
170-
ax.set_xlabel('{} bins' ''.format(variable), fontsize=16)
180+
ax.set_xlabel("Bins", fontsize=15)
171181
ax.xaxis.set_tick_params(labelsize=14)
172182
plt.setp(ax.get_xticklabels(),
173183
rotation=45, ha="right", rotation_mode="anchor")
@@ -210,13 +220,13 @@ def plot_incidence(pig_tables: pd.DataFrame,
210220
align='center', color="#939598", zorder=1)
211221

212222
# Set labels & ticks
213-
ax2.set_xlabel('{} bins' ''.format(variable), fontsize=16)
223+
ax2.set_xlabel("Bins", fontsize=15)
214224
ax2.xaxis.set_tick_params(rotation=45, labelsize=14)
215225

216226
ax2.yaxis.set_tick_params(labelsize=14)
217227
ax2.yaxis.set_major_formatter(
218228
FuncFormatter(lambda y, _: '{:.1%}'.format(y)))
219-
ax2.set_ylabel('population size', fontsize=16)
229+
ax2.set_ylabel('Population size', fontsize=15)
220230
ax2.tick_params(axis='y', colors="#939598")
221231
ax2.yaxis.label.set_color('#939598')
222232

@@ -229,10 +239,11 @@ def plot_incidence(pig_tables: pd.DataFrame,
229239

230240
# Title & legend
231241
if model_type == "classification":
232-
title = "Incidence plot - " + variable
242+
title = "Incidence plot"
233243
else:
234-
title = "Mean target plot - " + variable
235-
fig.suptitle(title, fontsize=22)
244+
title = "Mean target plot"
245+
fig.suptitle(title, fontsize=20)
246+
plt.title(variable, fontsize=17)
236247
ax.legend(frameon=False, bbox_to_anchor=(0., 1.01, 1., .102),
237248
loc=3, ncol=1, mode="expand", borderaxespad=0.,
238249
prop={"size": 14})

0 commit comments

Comments
 (0)