## Importing libraries for the work
import pandas as pd
import sys
import numpy as np
import matplotlib.pyplot
import seaborn as sns


shipment_data = pd.read_csv(r"C:\Users\User Pc\Desktop\Datasets\Shipment data Python\Shipment details data.csv",
                           encoding='cp1252')
shipment_data.head(2)


## Identifying the number of columns and columns names
print(f"The no of columns in the dataset is {len(shipment_data.columns)}"),shipment_data.columns

The no of columns in the dataset is 53

(None,
 Index(['Type', 'Days for shipping (real)', 'Days for shipment (scheduled)',
        'Benefit per order', 'Sales per customer', 'Delivery Status',
        'Late_delivery_risk', 'Category Id', 'Category Name', 'Customer City',
        'Customer Country', 'Customer Email', 'Customer Fname', 'Customer Id',
        'Customer Lname', 'Customer Password', 'Customer Segment',
        'Customer State', 'Customer Street', 'Customer Zipcode',
        'Department Id', 'Department Name', 'Latitude', 'Longitude', 'Market',
        'Order City', 'Order Country', 'Order Customer Id',
        'order date (DateOrders)', 'Order Id', 'Order Item Cardprod Id',
        'Order Item Discount', 'Order Item Discount Rate', 'Order Item Id',
        'Order Item Product Price', 'Order Item Profit Ratio',
        'Order Item Quantity', 'Sales', 'Order Item Total',
        'Order Profit Per Order', 'Order Region', 'Order State', 'Order Status',
        'Order Zipcode', 'Product Card Id', 'Product Category Id',
        'Product Description', 'Product Image', 'Product Name', 'Product Price',
        'Product Status', 'shipping date (DateOrders)', 'Shipping Mode'],
       dtype='object'))


shipment_data.drop(columns=['Late_delivery_risk', 'Category Id', 'Customer Email', 'Customer Fname', 'Customer City',
                        'Customer Lname', 'Customer Password', 'Customer State', 'Customer Street', 'Customer Zipcode',
                        'Department Id', 'Latitude', 'Longitude', 'Order Item Cardprod Id', 'Order Item Profit Ratio', 
                         'Sales', 'Order Item Total', 'Order Profit Per Order', 
                    'Order Status', 'Product Price', 'Product Status'], inplace=True)


## Dropping more unnecessary columns in the dataset
shipment_data.drop(columns=['Customer Segment', 'Order Item Product Price', 'Order State', 
                                'Order Region', 'Order Zipcode', 'Product Card Id', 'Product Category Id',
                        'Product Description', 'Product Image'], inplace=True)


shipment_data.head(2)


shipment_data.columns

Index(['Type', 'Days for shipping (real)', 'Days for shipment (scheduled)',
       'Benefit per order', 'Sales per customer', 'Delivery Status',
       'Category Name', 'Customer Country', 'Customer Id', 'Department Name',
       'Market', 'Order City', 'Order Country', 'Order Customer Id',
       'order date (DateOrders)', 'Order Id', 'Order Item Discount',
       'Order Item Discount Rate', 'Order Item Id', 'Order Item Quantity',
       'Product Name', 'shipping date (DateOrders)', 'Shipping Mode'],
      dtype='object')


shipment_data.describe().transpose()


## Vital info about Real shipment time(day) vs scheduled shipment time(day) 
## By comparing their respective STD, Mean and Max.


real_shipment_days = shipment_data['Days for shipping (real)'].value_counts()
real_shipment_days.plot(kind='pie', title='No of times each shipment duration(days) reoccurred', figsize=(6,6))

<Axes: title={'center': 'No of times each shipment duration(days) reoccurred'}, ylabel='count'>


shipment_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180519 entries, 0 to 180518
Data columns (total 23 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   Type                           180519 non-null  object 
 1   Days for shipping (real)       180519 non-null  int64  
 2   Days for shipment (scheduled)  180519 non-null  int64  
 3   Benefit per order              180519 non-null  float64
 4   Sales per customer             180519 non-null  float64
 5   Delivery Status                180519 non-null  object 
 6   Category Name                  180519 non-null  object 
 7   Customer Country               180519 non-null  object 
 8   Customer Id                    180519 non-null  int64  
 9   Department Name                180519 non-null  object 
 10  Market                         180519 non-null  object 
 11  Order City                     180519 non-null  object 
 12  Order Country                  180519 non-null  object 
 13  Order Customer Id              180519 non-null  int64  
 14  order date (DateOrders)        180519 non-null  object 
 15  Order Id                       180519 non-null  int64  
 16  Order Item Discount            180519 non-null  float64
 17  Order Item Discount Rate       180519 non-null  float64
 18  Order Item Id                  180519 non-null  int64  
 19  Order Item Quantity            180519 non-null  int64  
 20  Product Name                   180519 non-null  object 
 21  shipping date (DateOrders)     180519 non-null  object 
 22  Shipping Mode                  180519 non-null  object 
dtypes: float64(4), int64(7), object(12)
memory usage: 31.7+ MB


shipment_data[ 'order date (DateOrders)'] = pd.to_datetime(shipment_data[ 'order date (DateOrders)'])


## Coverting the Orders date data to Year
shipment_data['Year'] = shipment_data[ 'order date (DateOrders)'].dt.year


customers_orders_per_year = shipment_data.groupby('Year')[['Customer Id', 'Order Id']].nunique()
customers_orders_per_year


customers_orders_per_year.plot(kind='barh', rot=0, grid=True, stacked=True, color=['gold', 'red'])
## The viz shows the company had a major drop off in customers in the year 2018

<Axes: ylabel='Year'>


sales_qty_profit_per_year = shipment_data.groupby('Year')[['Benefit per order', 
                        'Sales per customer', 'Order Item Quantity']].sum()
sales_qty_profit_per_year


sales_qty_profit_per_year.plot.bar(rot=0, grid=True, color=['darkred', 'darkblue', 'darkgreen'])
## The viz shows;
## 1.A relatively similarity in sales, order Qty, and profit per order from 2015 to 2017 
##                                           (though sales in 2017 was a slight drop in comaprison to 2015 and 2016)
## 3. the company had a major drop off in sales,order Qty and profit per order in the year 2018

<Axes: xlabel='Year'>


products_by_orders = shipment_data.groupby('Product Name', 
                    as_index=False)[['Order Id']].nunique().sort_values(by='Order Id', ascending=False)
products_by_orders.head(3)


products_by_orders[:10].plot.barh(x='Product Name', y='Order Id', color= 'indigo')

<Axes: ylabel='Product Name'>


sales_qty_profit_by_product = shipment_data.groupby('Product Name', as_index=False)[['Benefit per order', 
                        'Sales per customer']].sum().sort_values(by='Sales per customer', ascending=False)
sales_qty_profit_by_product.head(5)


sales_qty_profit_by_product[:5].plot(kind='barh', x='Product Name', rot=0)

<Axes: ylabel='Product Name'>


customers_orders_by_shipping_mode = shipment_data.groupby('Shipping Mode', as_index=False)[['Customer Id', 
                                    'Order Id']].nunique().sort_values(by=['Customer Id','Order Id'], ascending=False)
customers_orders_by_shipping_mode


customers_orders_by_shipping_mode.plot(x='Shipping Mode', kind='bar', rot=20, color=['brown', 'magenta'], stacked=True)
## The viz shows the shipping mode with the highest orders by customer demands is the Standard Class shipping mode

<Axes: xlabel='Shipping Mode'>


sales_profit_by_shipping_mode = shipment_data.groupby('Shipping Mode', as_index=False)[['Benefit per order', 
                        'Sales per customer']].sum().sort_values(by=['Benefit per order', 
                                                                     'Sales per customer'], ascending=False)
sales_profit_by_shipping_mode


sales_profit_by_shipping_mode.plot(x='Shipping Mode', kind='barh', rot=0, color=['black', 'orange'], stacked=True)

<Axes: ylabel='Shipping Mode'>


customers_orders_by_shipping_mode = shipment_data.groupby('Shipping Mode', as_index=False)[['Customer Id', 
                                    'Order Id']].nunique().sort_values(by=['Customer Id','Order Id'], ascending=False)
customers_orders_by_shipping_mode


country_by_customers_orders = shipment_data.groupby('Customer Country', as_index=False)[['Customer Id', 
                                    'Order Id']].nunique().sort_values(by=['Customer Id','Order Id'], ascending=False)
country_by_customers_orders


country_by_customers_orders.plot.bar(x='Customer Country',rot=0, grid=True, 
                                     color=['darkred', 'green'])
## The viz shows that EE. UU. countries are where most of the companies clients live(Over 60%)
## and the same with the orders, where orders are over 60% as well from EE. UU. countries

<Axes: xlabel='Customer Country'>


destination_country_of_orders = shipment_data.groupby('Order Country', 
                            as_index=False)['Order Id'].nunique().sort_values(by='Order Id', ascending=False)
destination_country_of_orders.head(10)


destination_country_of_orders[:10].plot.barh(x='Order Country',rot=0, color='grey')

<Axes: ylabel='Order Country'>


## comparing the highest shipment delivery mode used the most by the orders
delivery_status_by_orders = shipment_data.groupby('Delivery Status', 
                            as_index=False)['Order Id'].nunique().sort_values(by='Order Id', ascending=False)

delivery_status_by_orders


delivery_status_by_orders.plot.bar(x='Delivery Status',rot=20, color='darkgreen')
## The viz below shows that more than 50% of the orders delivered were Late deliveries

<Axes: xlabel='Delivery Status'>

	Type	Days for shipping (real)	Days for shipment (scheduled)	Benefit per order	Sales per customer	Delivery Status	Late_delivery_risk	Category Id	Category Name	Customer City	...	Order Zipcode	Product Card Id	Product Category Id	Product Description	Product Image	Product Name	Product Price	Product Status	shipping date (DateOrders)	Shipping Mode
0	DEBIT	3	4	91.250000	314.640015	Advance shipping	0	73	Sporting Goods	Caguas	...	NaN	1360	73	NaN	http://images.acmesports.sports/Smart+watch	Smart watch	327.75	0	2/3/2018 22:56	Standard Class
1	TRANSFER	5	4	-249.089996	311.359985	Late delivery	1	73	Sporting Goods	Caguas	...	NaN	1360	73	NaN	http://images.acmesports.sports/Smart+watch	Smart watch	327.75	0	1/18/2018 12:27	Standard Class

	Type	Days for shipping (real)	Days for shipment (scheduled)	Benefit per order	Sales per customer	Delivery Status	Category Name	Customer Country	Customer Id	Department Name	...	Order Customer Id	order date (DateOrders)	Order Id	Order Item Discount	Order Item Discount Rate	Order Item Id	Order Item Quantity	Product Name	shipping date (DateOrders)	Shipping Mode
0	DEBIT	3	4	91.250000	314.640015	Advance shipping	Sporting Goods	Puerto Rico	20755	Fitness	...	20755	1/31/2018 22:56	77202	13.110000	0.04	180517	1	Smart watch	2/3/2018 22:56	Standard Class
1	TRANSFER	5	4	-249.089996	311.359985	Late delivery	Sporting Goods	Puerto Rico	19492	Fitness	...	19492	1/13/2018 12:27	75939	16.389999	0.05	179254	1	Smart watch	1/18/2018 12:27	Standard Class

	count	mean	std	min	25%	50%	75%	max
Days for shipping (real)	180519.0	3.497654	1.623722	0.00000	2.000000	3.000000	5.000000	6.000000
Days for shipment (scheduled)	180519.0	2.931847	1.374449	0.00000	2.000000	4.000000	4.000000	4.000000
Benefit per order	180519.0	21.974989	104.433526	-4274.97998	7.000000	31.520000	64.800003	911.799988
Sales per customer	180519.0	183.107609	120.043670	7.49000	104.379997	163.990005	247.399994	1939.989990
Customer Id	180519.0	6691.379495	4162.918106	1.00000	3258.500000	6457.000000	9779.000000	20757.000000
Order Customer Id	180519.0	6691.379495	4162.918106	1.00000	3258.500000	6457.000000	9779.000000	20757.000000
Order Id	180519.0	36221.894903	21045.379569	1.00000	18057.000000	36140.000000	54144.000000	77204.000000
Order Item Discount	180519.0	20.664741	21.800901	0.00000	5.400000	14.000000	29.990000	500.000000
Order Item Discount Rate	180519.0	0.101668	0.070415	0.00000	0.040000	0.100000	0.160000	0.250000
Order Item Id	180519.0	90260.000000	52111.490959	1.00000	45130.500000	90260.000000	135389.500000	180519.000000
Order Item Quantity	180519.0	2.127638	1.453451	1.00000	1.000000	1.000000	3.000000	5.000000

	Customer Id	Order Id
Year
2015	10131	20904
2016	10222	20859
2017	15156	21866
2018	2123	2123

	Benefit per order	Sales per customer	Order Item Quantity
Year
2015	1.318857e+06	1.108954e+07	138480
2016	1.310119e+06	1.105600e+07	137352
2017	1.304085e+06	1.061091e+07	106124
2018	3.384189e+04	2.979521e+05	2123

A descriptive analysis of shipment data¶

Importing the dataframe into Pandas¶

Data cleaning process begins¶

Dropping all unnecessary columns in the dataset so as to prioritize necessary columns to be used in the analysis process¶

A general overview of the numerical data columns of the shipment dataframe¶

The numerical description does provide alot of vital info of the overall performance of the shipment company¶

Viz of real shipment days¶

General shipment data info¶

To analyze the no of customers per year,¶

I'll convert the Orderdate object into a datetime datatype¶

Comparing the Unique no of shipment customers and orders per year¶

Viz of shipment customers and orders per year¶

Comparing Quantity of goods ordered, sales, and Benefit per order over the Years of shipment¶

Viz of shipment customers and orders per year¶

Sorting Products by its number of orders¶

Viz of Top 10 products by its orders¶

Comparing Products by their respective sales and Benefit¶

Viz of Top 5 products by sales and profit per order¶

Total no of customers and order by shipping mode style¶

Viz of shipment mode orshpping style with regards to high customer demand and orders¶

In terms of profit and sales for shipping mode, again the Standard Class rises to the top¶

The viz below shows Standard Class to be a mile above all in terms of its sales rate and profitability¶

Total no of customers and order by shipping mode style¶

The country with the highest customer base and orders-origin¶

Viz of shipment customers and orders per year¶

Destination Top 10 country for shipment orders¶

Viz of the destination Top 10 country for shipment orders¶

The shipment delivery status by Orders¶

	Product Name	Order Id
71	Perfect Fitness Perfect Rip Deck	20359
56	Nike Men's CJ Elite 2 TD Football Cleat	18783
59	Nike Men's Dri-FIT Victory Golf Polo	17869

	Product Name	Benefit per order	Sales per customer
24	Field & Stream Sportsman 16 Gun Fire Safe	756220.767190	6.226935e+06
71	Perfect Fitness Perfect Rip Deck	493828.299782	3.973180e+06
21	Diamondback Women's Serene Classic Comfort Bi	427455.568106	3.700784e+06
61	Nike Men's Free 5.0+ Running Shoe	379915.818503	3.295693e+06
59	Nike Men's Dri-FIT Victory Golf Polo	350421.029567	2.828708e+06

	Shipping Mode	Customer Id	Order Id
3	Standard Class	16578	39324
2	Second Class	8969	12778
0	First Class	7594	10079
1	Same Day	3208	3571

	Shipping Mode	Benefit per order	Sales per customer
3	Standard Class	2.370454e+06	1.979222e+07
2	Second Class	7.503082e+05	6.416529e+06
0	First Class	6.431219e+05	5.100451e+06
1	Same Day	2.030184e+05	1.745202e+06

	Order Country	Order Id
48	Estados Unidos	8270
53	Francia	4866
102	México	4395
8	Australia	3798
2	Alemania	3518
120	Reino Unido	2785
20	Brasil	2650
31	China	2616
69	India	2152
70	Indonesia	1920

	Delivery Status	Order Id
1	Late delivery	36048
0	Advance shipping	15127
3	Shipping on time	11722
2	Shipping canceled	2855