Source code for bs_ds.glassboxes

# -*- coding: utf-8 -*-

""" A collection of modified tools to visualize the inner-workings of model objects, especially Catboot Models."""
# from sklearn.tree import export_graphviz

[docs]def make_activations_model(model,idx_layers_to_show=None, verbose=True): """Accepts a Keras image convolution model and exports a new model, with just the intermediate activations to plot with plot_activations().""" import keras import matplotlib.pyplot as plt from keras import models import numpy as np # If no image layer index provided, get all Conv2D and MaxPooling2D layers if idx_layers_to_show == None: layers_to_show = [] # Check all layers for appropriate types for l,layer in enumerate(model.layers): check_type = type(layer) if check_type in [keras.layers.convolutional.Conv2D, keras.layers.pooling.MaxPooling2D]: layers_to_show.append(layer) # Create layer_output s layer_outputs = [layer.output for layer in layers_to_show] else: check_dims = np.shape(idx_layers_to_show) # Check if 2 index numbers provided if check_dims == 2: idx_start = idx_layers_to_show[0] idx_end = idx_layers_to_show[1] layer_outputs = [layer.output for layer in model.layers[idx_start:idx_end]]# exclude the flatten and dense layers elif check_dims == 1: layer_outputs = [layer.output for layer in model.layers[idx_layers_to_show]]# exclude the flatten and dense layers # Now that we have layer_outputs, lets creat ethe activaiton_model activation_model = models.Model(inputs=model.input, outputs=layer_outputs) if verbose==True: print(activation_model.summary()) return activation_model
[docs]def plot_activations(activations_model, img_tensor, n_cols=16,process=True,colormap='viridis'): """Accepts an activations_model from make_activations_model. Plots all channels' outputs for every image layer in the model.""" import math import matplotlib.pyplot as plt import numpy as np # Genearate activations from model activations = activations_model.predict(img_tensor) # Extract layer names for labels layer_names = [] # for layer in model.layers[:8]: for layer in activations_model.layers: layer_names.append(layer.name) # Calculate the number of rows and columns for the figure total_features = sum([a.shape[-1] for a in activations]) # shape[-1] ==number of outputs n_rows = math.ceil(total_features / n_cols) # creat the figure and plots fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(n_cols,n_rows*1.2) ) iteration = 0 for layer_n, layer_activation in enumerate(activations): n_channels = layer_activation.shape[-1] for ch_idx in range(n_channels): row = iteration // n_cols column = iteration % n_cols ax = axes[row, column] channel_image = layer_activation[0,:,:,ch_idx] if process==True: """create a z-score of the image""" channel_image -= channel_image.mean() channel_image /= channel_image.std() channel_image *= 64 channel_image += 128 channel_image = np.clip(channel_image, 0, 255).astype('uint8') ax.imshow(channel_image, aspect='auto',cmap=colormap) # Remove x and y ticks ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) # Add labels for first channel in layer if ch_idx == 0: ax.set_title(layer_names[layer_n],fontsize=10) iteration +=1 # After all channels in a layer are finished: fig.subplots_adjust(hspace=1.25) plt.show() return fig, axes
[docs]def plot_auc_roc_curve(y_test, y_test_pred): """ Takes y_test and y_test_pred from a ML model and uses sklearn roc_curve to plot the AUC-ROC curve.""" from sklearn.metrics import roc_curve, auc, roc_auc_score import matplotlib.pyplot as plt auc = roc_auc_score(y_test, y_test_pred[:,1]) FPr, TPr, _ = roc_curve(y_test, y_test_pred[:,1]) auc() plt.plot(FPr, TPr,label=f"AUC for Classifier:\n{round(auc,2)}" ) plt.plot([0, 1], [0, 1], lw=2,linestyle='--') plt.xlim([-0.01, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic (ROC) Curve') plt.legend(loc="lower right") plt.show()
[docs]def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=None, print_matrix=True): """Check if Normalization Option is Set to True. If so, normalize the raw confusion matrix before visualizing #Other code should be equivalent to your previous function.""" import itertools import numpy as np import matplotlib.pyplot as plt if cmap==None: cmap = plt.get_cmap("Blues") if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') plt.show()
# Display graphviz tree
[docs]def viz_tree(tree_object): '''Takes a Sklearn Decision Tree and returns a png image using graph_viz and pydotplus.''' # Visualize the decision tree using graph viz library from sklearn.externals.six import StringIO from IPython.display import Image from sklearn.tree import export_graphviz import pydotplus dot_data = StringIO() export_graphviz(tree_object, out_file=dot_data, filled=True, rounded=True,special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) tree_viz = Image(graph.create_png()) return tree_viz
[docs]def plot_cat_feature_importances(cb_clf): """Accepts a fitted CatBoost classifier model and plots the feature importances as a bar chart. Returns the results as a Series.""" # Plotting Feature Importances import pandas as pd important_feature_names = cb_clf.feature_names_ important_feature_scores = cb_clf.feature_importances_ important_features = pd.Series(important_feature_scores, index = important_feature_names) important_features.sort_values().plot(kind='barh') return important_features
[docs]class Clock(object): """A clock meant to be used as a timer for functions using local time. Clock.tic() starts the timer, .lap() adds the current laps time to clock._list_lap_times, .toc() stops the timer. If user initiializes with verbose =0, only start and final end times are displays. If verbose=1, print each lap's info at the end of each lap. If verbose=2 (default, display instruction line, return datafarme of results.) """ from datetime import datetime from pytz import timezone from tzlocal import get_localzone from bs_ds import list2df # from bs_ds import list2df
[docs] def get_time(self,local=True): """Returns current time, in local time zone by default (local=True).""" from datetime import datetime from pytz import timezone from tzlocal import get_localzone _now_utc_=datetime.now(timezone('UTC')) _now_local_=_now_utc_.astimezone(self._timezone_) if local==True: time_now = _now_local_ return time_now#_now_local_ else: return _now_utc_
def __init__(self, display_final_time_as_minutes=True, verbose=2): from datetime import datetime from pytz import timezone from tzlocal import get_localzone self._strformat_ = [] self._timezone_ = [] self._timezone_ = get_localzone() self._start_time_ = [] self._lap_label_ = [] self._lap_end_time_ = [] self._verbose_ = verbose self._lap_duration_ = [] self._verbose_ = verbose self._prior_start_time_ = [] self._display_as_minutes_ = display_final_time_as_minutes strformat = "%m/%d/%y - %I:%M:%S %p" self._strformat_ = strformat
[docs] def mark_lap_list(self, label=None): """Used internally, appends the current laps' information when called by .lap() self._lap_times_list_ = [['Lap #' , 'Start Time','Stop Time', 'Stop Label', 'Duration']]""" import bs_ds as bs # print(self._prior_start_time_, self._lap_end_time_) if label is None: label='--' duration = self._lap_duration_.total_seconds() self._lap_times_list_.append([ self._lap_counter_ , # Lap # (self._prior_start_time_).strftime(self._strformat_), # This Lap's Start Time self._lap_end_time_,#.strftime(self._strformat_), # stop clock time label,#self._lap_label_, # The Label passed with .lap() f'{duration:.3f} sec']) # the lap duration
[docs] def tic(self, label=None ): "Start the timer and display current time, appends label to the _list_lap_times." from datetime import datetime from pytz import timezone self._start_time_ = self.get_time() self._start_label_ = label self._lap_counter_ = 0 self._prior_start_time_=self._start_time_ self._lap_times_list_=[] # Initiate lap counter and list self._lap_times_list_ = [['Lap #','Start Time','Stop Time', 'Label', 'Duration']] self._lap_counter_ = 0 self._decorate_ = '--- ' decorate=self._decorate_ base_msg = f'{decorate}CLOCK STARTED @: {self._start_time_.strftime(self._strformat_):>{25}}' if label == None: display_msg = base_msg+' '+ decorate label='--' else: spacer = ' ' display_msg = base_msg+f'{spacer:{10}} Label: {label:{10}} {decorate}' if self._verbose_>0: print(display_msg)#f'---- Clock started @: {self._start_time_.strftime(self._strformat_):>{25}} {spacer:{10}} label: {label:{20}} ----')
[docs] def toc(self,label=None, summary=True): """Stop the timer and displays results, appends label to final _list_lap_times entry""" if label == None: label='--' from datetime import datetime from pytz import timezone from tzlocal import get_localzone from bs_ds import list2df if label is None: label='--' _final_end_time_ = self.get_time() _total_time_ = _final_end_time_ - self._start_time_ _end_label_ = label self._lap_counter_+=1 self._final_end_time_ = _final_end_time_ self._lap_label_=_end_label_ self._lap_end_time_ = _final_end_time_.strftime(self._strformat_) self._lap_duration_ = _final_end_time_ - self._prior_start_time_ self._total_time_ = _total_time_ decorate=self._decorate_ # Append Summary Line if self._display_as_minutes_ == True: total_seconds = self._total_time_.total_seconds() total_mins = int(total_seconds // 60) sec_remain = total_seconds % 60 total_time_to_display = f'{total_mins} min, {sec_remain:.3f} sec' else: total_seconds = self._total_time_.total_seconds() sec_remain = round(total_seconds % 60,3) total_time_to_display = f'{sec_remain} sec' self._lap_times_list_.append(['TOTAL', self._start_time_.strftime(self._strformat_), self._final_end_time_.strftime(self._strformat_), label, total_time_to_display]) #'Total Time: ', total_time_to_display]) if self._verbose_>0: print(f'--- TOTAL DURATION = {total_time_to_display:>{15}} {decorate}') if summary: self.summary()
[docs] def lap(self, label=None): """Records time, duration, and label for current lap. Output display varies with clock verbose level. Calls .mark_lap_list() to document results in clock._list_lap_ times.""" from datetime import datetime if label is None: label='--' _end_time_ = self.get_time() # Append the lap attribute list and counter self._lap_label_ = label self._lap_end_time_ = _end_time_.strftime(self._strformat_) self._lap_counter_+=1 self._lap_duration_ = (_end_time_ - self._prior_start_time_) # Now update the record self.mark_lap_list(label=label) # Now set next lap's new _prior_start self._prior_start_time_=_end_time_ spacer = ' ' if self._verbose_>0: print(f' - Lap # {self._lap_counter_} @: \ {self._lap_end_time_:>{25}} {spacer:{5}} Dur: {self._lap_duration_.total_seconds():.3f} sec.\ {spacer:{5}}Label: {self._lap_label_:{20}}')
[docs] def summary(self): """Display dataframe summary table of Clock laps""" from bs_ds import list2df import pandas as pd from IPython.display import display df_lap_times = list2df(self._lap_times_list_)#,index_col='Lap #') df_lap_times.drop('Stop Time',axis=1,inplace=True) df_lap_times = df_lap_times[['Lap #','Start Time','Duration','Label']] dfs = df_lap_times.style.hide_index().set_caption('Summary Table of Clocked Processes').set_properties(subset=['Start Time','Duration'],**{'width':'140px'}) display(dfs.set_table_styles([dict(selector='table, th', props=[('text-align', 'center')])]))