
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from sklearn.datasets import make_classification

df = pd.read_csv('./basic_labeling.csv')
# X, y = make_classification(n_samples=1000, n_features=2, n_classes=2, n_repeated=0, n_redundant=0, n_informative=2, n_clusters_per_class=1)
# df = pd.DataFrame(X, columns=['x1', 'x2'])
# df['label'] = y


plt.scatter(df.x1, df.x2, c=df.label, cmap='tab10')
plt.show()


train, test = train_test_split(df, test_size=0.2)

model = LogisticRegression()

model.fit(train.iloc[:,:2], train.label)

print(model.coef_)
print(model.intercept_)

predictions = model.predict(test.iloc[:,:2])

ConfusionMatrixDisplay.from_predictions(test.label, predictions, cmap='summer')
plt.show()

# Works for binary classifiers but not multi?

b = model.intercept_[0]
w1, w2 = model.coef_.T

c = -b / w2
m = -w1 / w2

# print(c,m)

xs = np.linspace(df.x1.min(), df.x1.max(), 100)

# plt.scatter(df.x1, df.x2, c=df.label, alpha=0.2, cmap='Paired')
# plt.show()

fig, ax = plt.subplots(figsize=(5,5))
DecisionBoundaryDisplay.from_estimator(model, df.iloc[:,:2], response_method='predict', cmap='tab10', alpha=0.25, grid_resolution=200, ax=ax)
ax.scatter(df.x1, df.x2, c=df.label, alpha=0.75, cmap='tab10', edgecolor='black')
# plt.plot(xs, m * xs.reshape(-1,1) + c*np.ones(xs.shape).reshape(-1,1), '--', lw=3, color='black')
ax.set_xlabel(r'$x_1$', fontsize=20)
ax.set_ylabel(r'$x_2$', fontsize=20)
plt.tight_layout()
plt.savefig('decision_boundary.png', facecolor='#FFFFFF00')
plt.show()

