# manipulating data
import sqlalchemy
import pandas as pd
import warnings
import json

# Plotting and Visualization
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import chart_studio.plotly as py
import cufflinks as cf

# Interactive charts
from ipywidgets import interact, fixed, IntSlider
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.display import display, HTML, Image
from urllib.request import urlopen

# Options
cached_data = {} 
init_notebook_mode(connected = True)
cf.go_offline()
warnings.filterwarnings(action = 'ignore')
pd.options.display.float_format = '{:,.4f}'.format
with urlopen('https://raw.githubusercontent.com/codeforamerica/click_that_hood/master/public/data/brazil-states.geojson') as response:
    Brazil = json.load(response) # Javascrip object notation


# Setup Postgresql database connection
sqlalchemy.create_engine('postgresql://postgres:3ai6ua27hi2h@localhost:5432/olist')
%load_ext sql
%sql postgresql://postgres:3ai6ua27hi2h@localhost:5432/olist


%%sql
CREATE SCHEMA IF NOT EXISTS olist;
SET SCHEMA 'olist';

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.

[]


%%sql
DROP TABLE IF EXISTS order_items;
DROP TABLE IF EXISTS products;
DROP TABLE IF EXISTS category_names_english;
DROP TABLE IF EXISTS order_payments;
DROP TABLE IF EXISTS order_reviews;
DROP TABLE IF EXISTS orders;
DROP TABLE IF EXISTS sellers;
DROP TABLE IF EXISTS customers;
DROP TABLE IF EXISTS geolocation;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.

[]


%%sql
DROP TABLE IF EXISTS geolocation;
CREATE TABLE IF NOT EXISTS geolocation (
	zip_code_prefix INTEGER NOT NULL,
	latitude NUMERIC,
	longitude NUMERIC,
	city TEXT,
	state TEXT
);
COPY geolocation FROM 'E:/Data/Olist/olist_geolocation_dataset.csv' HEADER CSV DELIMITER ',';
SELECT * FROM geolocation ORDER BY zip_code_prefix LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
1000163 rows affected.
10 rows affected.


%%sql
DROP TABLE IF EXISTS customers;
CREATE TABLE IF NOT EXISTS customers (
	customer_id TEXT PRIMARY KEY NOT NULL,
	customer_unique_id TEXT,
	zip_code_prefix INTEGER NOT NULL,
	city TEXT,
	state TEXT
);
COPY customers FROM 'E:/Data/Olist/olist_customers_dataset.csv' HEADER CSV DELIMITER ',';
SELECT * FROM customers LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
99441 rows affected.
10 rows affected.


%%sql
DROP TABLE IF EXISTS sellers;
CREATE TABLE IF NOT EXISTS sellers (
	seller_id TEXT PRIMARY KEY NOT NULL,
	zip_code_prefix INTEGER,
	city TEXT,
	state TEXT
);
COPY sellers FROM 'E:/Data/Olist/olist_sellers_dataset.csv' HEADER CSV DELIMITER ',';
SELECT * FROM sellers LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
3095 rows affected.
10 rows affected.


%%sql
DROP TABLE IF EXISTS orders;
CREATE TABLE IF NOT EXISTS orders (
	order_id TEXT PRIMARY KEY NOT NULL,
	customer_id TEXT NOT NULL,
	status TEXT,
	purchase_timestamp TIMESTAMP,
	approval_timestamp TIMESTAMP,
	delivered_carrier_date TIMESTAMP,
	delivered_customer_date TIMESTAMP,
	estimated_delivery_date TIMESTAMP,
	CONSTRAINT fk_customer FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
);
COPY orders FROM 'E:/Data/Olist/olist_orders_dataset.csv' HEADER CSV DELIMITER ',';
SELECT * FROM orders ORDER BY purchase_timestamp LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
99441 rows affected.
10 rows affected.


%%sql
DROP TABLE IF EXISTS order_reviews;
CREATE TABLE IF NOT EXISTS order_reviews (
	review_id TEXT NOT NULL,
	order_id TEXT NOT NULL,
	rating INTEGER,
	review_title TEXT,
	review_content TEXT,
	creation_timestamp TIMESTAMP,
	answer_timestamp TIMESTAMP,
	CONSTRAINT fk_reviews FOREIGN KEY (order_id) REFERENCES orders(order_id)
);
COPY order_reviews FROM 'E:/Data/Olist/olist_order_reviews_dataset.csv' HEADER CSV DELIMITER ',';
SELECT * FROM order_reviews ORDER BY creation_timestamp LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
99224 rows affected.
10 rows affected.


%%sql
DROP TABLE IF EXISTS order_payments;
CREATE TABLE IF NOT EXISTS order_payments (
	order_id TEXT NOT NULL,
	payment_sequential INTEGER,
	payment_type TEXT,
	payment_installments INTEGER,
	payment_value NUMERIC,
	CONSTRAINT fk_payments FOREIGN KEY (order_id) REFERENCES orders(order_id)
);
COPY order_payments FROM 'E:/Data/Olist/olist_order_payments_dataset.csv' HEADER CSV DELIMITER ',';
SELECT * FROM order_payments LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
103886 rows affected.
10 rows affected.


%%sql
DROP TABLE IF EXISTS category_names_english;
CREATE TABLE IF NOT EXISTS category_names_english (
	product_category TEXT PRIMARY KEY NOT NULL,
	product_category_eng TEXT
);
COPY category_names_english FROM 'E:/Data/Olist/product_category_name_translation.csv' HEADER CSV DELIMITER ',';
SELECT * FROM category_names_english LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
73 rows affected.
10 rows affected.


%%sql
DROP TABLE IF EXISTS products;
CREATE TABLE IF NOT EXISTS products (
	product_id TEXT PRIMARY KEY NOT NULL,
	product_category TEXT,
	name_length INTEGER,
	description_length INTEGER,
	photos_quantity INTEGER,
	weight_g INTEGER,
	length_cm INTEGER,
	height_cm INTEGER,
	width_cm INTEGER,
	CONSTRAINT fk_cateogry_eng FOREIGN KEY (product_category) REFERENCES category_names_english(product_category)
);
COPY products FROM 'E:/Data/Olist/olist_products_dataset.csv' HEADER CSV DELIMITER ',';
SELECT * FROM products LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
32951 rows affected.
10 rows affected.


%%sql
DROP TABLE IF EXISTS order_items;
CREATE TABLE IF NOT EXISTS order_items (
	order_id TEXT NOT NULL,
	item_id TEXT,
	product_id TEXT NOT NULL,
	seller_id TEXT NOT NULL,
	shipping_limit_date TIMESTAMP,
	price NUMERIC,
	freight_value NUMERIC,
	CONSTRAINT fk_products FOREIGN KEY (product_id) REFERENCES products(product_id),
	CONSTRAINT fk_orders FOREIGN KEY (order_id) REFERENCES orders(order_id),
	CONSTRAINT fk_sellers FOREIGN KEY (seller_id) REFERENCES sellers(seller_id)
);
COPY order_items FROM 'E:/Data/Olist/olist_order_items_dataset.csv' HEADER CSV DELIMITER ',';
SELECT * FROM order_items LIMIT 10;

 * postgresql://postgres:***@localhost:5432/olist
Done.
Done.
112650 rows affected.
10 rows affected.


%%sql
SELECT * FROM information_schema.tables WHERE table_schema = 'olist';

 * postgresql://postgres:***@localhost:5432/olist
9 rows affected.

zip_code_prefix	latitude	longitude	city	state
1001	-23.549779299469115	-46.6339571183853	são paulo	SP
1001	-23.551336655288804	-46.63402699777831	sao paulo	SP
1001	-23.550497706907514	-46.63433817805407	sao paulo	SP
1001	-23.551336655288804	-46.63402699777831	sao paulo	SP
1001	-23.54929199999999	-46.633559478233785	sao paulo	SP
1001	-23.55064182209015	-46.634409790322515	sao paulo	SP
1001	-23.551426655288804	-46.63407394670785	sao paulo	SP
1001	-23.54969829946912	-46.63390859285005	sao paulo	SP
1001	-23.550497706907514	-46.63433817805407	sao paulo	SP
1001	-23.550497706907514	-46.63433817805407	sao paulo	SP

customer_id	customer_unique_id	zip_code_prefix	city	state
06b8999e2fba1a1fbc88172c00ba8bc7	861eff4711a542e4b93843c6dd7febb0	14409	franca	SP
18955e83d337fd6b2def6b18a428ac77	290c77bc529b7ac935b93aa66c333dc3	9790	sao bernardo do campo	SP
4e7b3e00288586ebd08712fdd0374a03	060e732b5b29e8181a18229c7b0b2b5e	1151	sao paulo	SP
b2b6027bc5c5109e529d4dc6358b12c3	259dac757896d24d7702b9acbbff3f3c	8775	mogi das cruzes	SP
4f2d8ab171c80ec8364f7c12e35b23ad	345ecd01c38d18a9036ed96c73b8d066	13056	campinas	SP
879864dab9bc3047522c92c82e1212b8	4c93744516667ad3b8f1fb645a3116a4	89254	jaragua do sul	SC
fd826e7cf63160e536e0908c76c3f441	addec96d2e059c80c30fe6871d30d177	4534	sao paulo	SP
5e274e7a0c3809e14aba7ad5aae0d407	57b2a98a409812fe9618067b6b8ebe4f	35182	timoteo	MG
5adf08e34b2e993982a47070956c5c65	1175e95fb47ddff9de6b2b06188f7e0d	81560	curitiba	PR
4b7139f34592b3a31687243a302fa75b	9afe194fb833f79e300e37e580171f22	30575	belo horizonte	MG

seller_id	zip_code_prefix	city	state
3442f8959a84dea7ee197c632cb2df15	13023	campinas	SP
d1b65fc7debc3361ea86b5f14c68d2e2	13844	mogi guacu	SP
ce3ad9de960102d0677a81f5d0bb7b2d	20031	rio de janeiro	RJ
c0f3eea2e14555b6faeea3dd58c1b1c3	4195	sao paulo	SP
51a04a8a6bdcb23deccc82b0b80742cf	12914	braganca paulista	SP
c240c4061717ac1806ae6ee72be3533b	20920	rio de janeiro	RJ
e49c26c3edfa46d227d5121a6b6e4d37	55325	brejao	PE
1b938a7ec6ac5061a66a3766e0e75f90	16304	penapolis	SP
768a86e36ad6aae3d03ee3c6433d61df	1529	sao paulo	SP
ccc4bbb5f32a6ab2b7066a4130f114e3	80310	curitiba	PR

order_id	customer_id	status	purchase_timestamp	approval_timestamp	delivered_carrier_date	delivered_customer_date	estimated_delivery_date
2e7a8482f6fb09756ca50c10d7bfc047	08c5351a6aca1c1589a38f244edeee9d	shipped	2016-09-04 21:15:19	2016-10-07 13:18:03	2016-10-18 13:14:51	None	2016-10-20 00:00:00
e5fa5a7210941f7d56d0208e4e071d35	683c54fc24d40ee9f8a6fc179fd9856c	canceled	2016-09-05 00:15:34	2016-10-07 13:17:15	None	None	2016-10-28 00:00:00
809a282bbd5dbcabb6f2f724fca862ec	622e13439d6b5a0b486c435618b2679e	canceled	2016-09-13 15:24:19	2016-10-07 13:16:46	None	None	2016-09-30 00:00:00
bfbd0f9bdef84302105ad712db648a6c	86dc2ffce2dfff336de2f386a786e574	delivered	2016-09-15 12:16:38	2016-09-15 12:16:38	2016-11-07 17:11:53	2016-11-09 07:47:38	2016-10-04 00:00:00
71303d7e93b399f5bcd537d124c0bcfa	b106b360fe2ef8849fbbd056f777b4d5	canceled	2016-10-02 22:07:52	2016-10-06 15:50:56	None	None	2016-10-25 00:00:00
3b697a20d9e427646d92567910af6d57	355077684019f7f60a031656bd7262b8	delivered	2016-10-03 09:44:50	2016-10-06 15:50:54	2016-10-23 14:02:13	2016-10-26 14:02:13	2016-10-27 00:00:00
be5bc2f0da14d8071e2d45451ad119d9	7ec40b22510fdbea1b08921dd39e63d8	delivered	2016-10-03 16:56:50	2016-10-06 16:03:44	2016-10-21 16:33:46	2016-10-27 18:19:38	2016-11-07 00:00:00
65d1e226dfaeb8cdc42f665422522d14	70fc57eeae292675927697fe03ad3ff5	canceled	2016-10-03 21:01:41	2016-10-04 10:18:57	2016-10-25 12:14:28	2016-11-08 10:58:34	2016-11-25 00:00:00
a41c8759fbe7aab36ea07e038b2d4465	6f989332712d3222b6571b1cf5b835ce	delivered	2016-10-03 21:13:36	2016-10-05 03:11:49	2016-10-25 11:57:59	2016-11-03 10:58:07	2016-11-29 00:00:00
d207cc272675637bfed0062edffd0818	b8cf418e97ae795672d326288dfab7a7	delivered	2016-10-03 22:06:03	2016-10-04 10:28:07	2016-10-21 14:23:37	2016-10-31 11:07:42	2016-11-23 00:00:00

review_id	order_id	rating	review_title	review_content	creation_timestamp	answer_timestamp
69ac6a27fde9855ebeaaecac0f78058b	809a282bbd5dbcabb6f2f724fca862ec	1	None	MEU PEDIDO NÃO FOI ENTREGUE E NÃO FOI DADA NENHUMA SATISTAÇÃO	2016-10-02 00:00:00	2016-10-26 12:31:00
6916ca4502d6d3bfd39818759d55d536	bfbd0f9bdef84302105ad712db648a6c	1	None	nao recebi o produto e nem resposta da empresa	2016-10-06 00:00:00	2016-10-07 18:32:28
49f695dffa457eaba90d388a5c37e942	e5215415bb6f76fe3b7cb68103a0d1c0	1	None	PRODUTO NÃO CHEGOU,E JÁ PASSOU O PRAZO DE ENTREGA	2016-10-09 00:00:00	2016-10-11 14:31:29
b2d5d8db2a841d27a72e4c06c6212368	9aa3197e4887919fde0307fc23601d7a	4	None	Só chegou uma parte do pedido ate agora..	2016-10-15 00:00:00	2016-10-17 21:02:49
743d98b1a4782f0646898fc915ef002a	e2144124f98f3bf46939bc5183104041	4	None	None	2016-10-15 00:00:00	2016-10-16 03:20:17
53752edb26544dd41c1209f582c9c589	b8b9d7046c083150cb5360b83a8ebb51	5	None	O pedido foi entregue antes do prazo pr0metido	2016-10-16 01:00:00	2016-10-16 15:45:11
5d76a01a07627d6eab445af668ac3aef	67418605ab8f9414a2e238bbcdde1312	5	None	Muito TOP o produto. Lampadas de ótima qualidade, que iluminam muito bem e consomem quase nada.	2016-10-18 00:00:00	2016-12-26 15:24:14
da520c13fbd4886bc0c1207df180106c	d3a84031db6c5de813d5a3a3489712ca	5	None	None	2016-10-18 00:00:00	2016-10-19 21:21:25
8e8cc3353ea9bd13e1450d8fa269d474	ed3d575bd3358123b063ee64cdecdef5	5	None	Chego dentro do prazo tudo bem embalado	2016-10-18 00:00:00	2016-10-18 20:29:05
b2fc2fac36fdef519925b2f55a95d8e4	85841af0d94e5d0cd3a9c0e42e565b74	5	None	None	2016-10-18 00:00:00	2016-12-06 20:48:13

Brazilian E-Commerce Public Dataset by Olist¶

Objectives of this project¶

Establish table schema¶

Importing data to `olist` database¶

Create table `geolocation`¶

Create table `customers`¶

Creating table `sellers`¶

Creating table `orders`¶

Creating table `order_reviews`¶

Creating table `order_payments`¶

Creating table `category_names_english`¶

Create table `products`¶

Create table `order_items`¶

Checking schema information¶

Entity Relationship Diagram¶

Executive summary¶

References¶

order_id	payment_sequential	payment_type	payment_installments	payment_value
b81ef226f3fe1789b1e8b2acac839d17	1	credit_card	8	99.33
a9810da82917af2d9aefd1278f1dcfa0	1	credit_card	1	24.39
25e8ea4e93396b6fa0d3dd708e76c1bd	1	credit_card	1	65.71
ba78997921bbcdc1373bb41e913ab953	1	credit_card	8	107.78
42fdf880ba16b47b59251dd489d4441a	1	credit_card	2	128.45
298fcdf1f73eb413e4d26d01b25bc1cd	1	credit_card	2	96.12
771ee386b001f06208a7419e4fc1bbd7	1	credit_card	1	81.16
3d7239c394a212faae122962df514ac7	1	credit_card	3	51.84
1f78449c87a54faf9e96e88ba1491fa9	1	credit_card	6	341.09
0573b5e23cbd798006520e1d5b4c6714	1	boleto	1	51.95

product_category	product_category_eng
portateis_cozinha_e_preparadores_de_alimentos	kitchen_and_food_preparadores_portables
pc_gamer	pc_gamer
beleza_saude	health_beauty
informatica_acessorios	computers_accessories
automotivo	auto
cama_mesa_banho	bed_bath_table
moveis_decoracao	furniture_decor
esporte_lazer	sports_leisure
perfumaria	perfumery
utilidades_domesticas	housewares

product_id	product_category	name_length	description_length	photos_quantity	weight_g	length_cm	height_cm	width_cm
1e9e8ef04dbcff4541ed26657ea517e5	perfumaria	40	287	1	225	16	10	14
3aa071139cb16b67ca9e5dea641aaa2f	artes	44	276	1	1000	30	18	20
96bd76ec8810374ed1b65e291975717f	esporte_lazer	46	250	1	154	18	9	15
cef67bcfe19066a932b7673e239eb23d	bebes	27	261	1	371	26	4	26
9dc1a7de274444849c219cff195d0b71	utilidades_domesticas	37	402	4	625	20	17	13
41d3672d4792049fa1779bb35283ed13	instrumentos_musicais	60	745	1	200	38	5	11
732bd381ad09e530fe0a5f457d81becb	cool_stuff	56	1272	4	18350	70	24	44
2548af3e6e77a690cf3eb6368e9ab61e	moveis_decoracao	56	184	2	900	40	8	40
37cc742be07708b53a98702e77a21a02	eletrodomesticos	57	163	1	400	27	13	17
8c92109888e8cdf9d66dc7e463025574	brinquedos	36	1156	1	600	17	10	12

order_id	item_id	product_id	seller_id	shipping_limit_date	price	freight_value
00010242fe8c5a6d1ba2dd792cb16214	1	4244733e06e7ecb4970a6e2683c13e61	48436dade18ac8b2bce089ec2a041202	2017-09-19 09:45:35	58.90	13.29
00018f77f2f0320c557190d7a144bdd3	1	e5f2d52b802189ee658865ca93d83a8f	dd7ddc04e1b6c2c614352b383efe2d36	2017-05-03 11:05:13	239.90	19.93
000229ec398224ef6ca0657da4fc703e	1	c777355d18b72b67abbeef9df44fd0fd	5b51032eddd242adc84c38acab88f23d	2018-01-18 14:48:30	199.00	17.87
00024acbcdf0a6daa1e931b038114c75	1	7634da152a4610f1595efa32f14722fc	9d7a1d34a5052409006425275ba1c2b4	2018-08-15 10:10:18	12.99	12.79
00042b26cf59d7ce69dfabb4e55b4fd9	1	ac6c3623068f30de03045865e4e10089	df560393f3a51e74553ab94004ba5c87	2017-02-13 13:57:51	199.90	18.14
00048cc3ae777c65dbb7d2a0634bc1ea	1	ef92defde845ab8450f9d70c526ef70f	6426d21aca402a131fc0a5d0960a3c90	2017-05-23 03:55:27	21.90	12.69
00054e8431b9d7675808bcb819fb4a32	1	8d4f2bb7e93e6710a28f34fa83ee7d28	7040e82f899a04d1b434b795a43b4617	2017-12-14 12:10:31	19.90	11.85
000576fe39319847cbb9d288c5617fa6	1	557d850972a7d6f792fd18ae1400d9b6	5996cddab893a4652a15592fb58ab8db	2018-07-10 12:30:45	810.00	70.75
0005a1a1728c9d785b8e2b08b904576c	1	310ae3c140ff94b03219ad0adc3c778f	a416b6a846a11724393025641d4edd5e	2018-03-26 18:31:29	145.95	11.65
0005f50442cb953dcd1d21e1fb923495	1	4535b0e1091c278dfd193e5a1d63b39f	ba143b05f0110f0dc71ad71b4466ce92	2018-07-06 14:10:56	53.99	11.40

table_catalog	table_schema	table_name	table_type	self_referencing_column_name	reference_generation	user_defined_type_catalog	user_defined_type_schema	user_defined_type_name	is_insertable_into	is_typed	commit_action
olist	olist	geolocation	BASE TABLE	None	None	None	None	None	YES	NO	None
olist	olist	customers	BASE TABLE	None	None	None	None	None	YES	NO	None
olist	olist	orders	BASE TABLE	None	None	None	None	None	YES	NO	None
olist	olist	category_names_english	BASE TABLE	None	None	None	None	None	YES	NO	None
olist	olist	order_reviews	BASE TABLE	None	None	None	None	None	YES	NO	None
olist	olist	products	BASE TABLE	None	None	None	None	None	YES	NO	None
olist	olist	order_payments	BASE TABLE	None	None	None	None	None	YES	NO	None
olist	olist	order_items	BASE TABLE	None	None	None	None	None	YES	NO	None
olist	olist	sellers	BASE TABLE	None	None	None	None	None	YES	NO	None

Brazilian E-Commerce Public Dataset by Olist¶

Objectives of this project¶

Establish table schema¶

Importing data to olist database¶

Create table geolocation¶

Create table customers¶

Creating table sellers¶

Creating table orders¶

Creating table order_reviews¶

Creating table order_payments¶

Creating table category_names_english¶

Create table products¶

Create table order_items¶

Checking schema information¶

Entity Relationship Diagram¶

Executive summary¶

References¶

Importing data to `olist` database¶

Create table `geolocation`¶

Create table `customers`¶

Creating table `sellers`¶

Creating table `orders`¶

Creating table `order_reviews`¶

Creating table `order_payments`¶

Creating table `category_names_english`¶

Create table `products`¶

Create table `order_items`¶