#exploring our data and visualization
# train and create a linear regression model
#Evaluate our model
#Make prediction
#Calculate RMSE
#What if we consider mode/less features?
#acompanhado desde https://www.youtube.com/watch?v=URLwFGwCBb0&t=3835s
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
#import dataset y salvando en la variable house
house = pd.read_csv('kc_house_data.csv')
#ver las 5 primeras entradas do dataset
house.head()
#sacar informacoes do dataset
house.info()
#sacar principales datos generales como media, min, max etc...
house.describe()
#sacar os nomes das columnas
house.columns
#para dejar el grafico mayor
plt.figure(figsize=(10, 6))
#sacar un grafico price x sqft_living
plt.scatter(house.sqft_living, house.price)
plt.xlabel('sqft of house')
plt.ylabel('price of house')
#para dejar el grafico mayor
plt.figure(figsize=(10, 6))
#por la linea de regresion
sns.lmplot('sqft_living', 'price', data=house)
#por a heatmap donde se puede sacar el visual de la correlaciĆ³n
sns.heatmap(house.corr())
#para ver la distribuiciĆ³n normal
sns.distplot(house['price'], color='red')
#sacar boxplot
sns.boxplot(x='zipcode', y='price', data=house)