本次综合实验以阿里巴巴天池的婴儿用品购买数据集为例,进行相关分析并提出建议。
数据来源说明:该数据源自于阿里天池,数据具体为2012年7月2日至2015年2月5日发生在淘宝天猫交易平台关于婴幼儿商品的交易数据,数据包含两个表格:
1.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import Series, DataFrame
2.
df_gender = pd.read_csv('./mum_baby.csv')
df_gender
3.
df_mums = pd.read_csv('./mum_baby_trade_history.csv')
df_mums
4.
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
df_mums['cat_id']
data_boy = df_mums['cat_id'].value_counts()
label_boy = data_boy.index.tolist()
value_boy = data_boy.tolist()
data_pair_boy = [i for i in zip(label_boy,value_boy)]
data_pair_boy
data = data_pair_boy[:20]
data
df = pd.DataFrame(data,columns =['商品二级分类','数量'])
df
df.plot(kind='bar',x = '商品二级分类',y = '数量',title = '商品二级分类的前二十名')
plt.show()
5.
df_mums_gender = pd.merge(df_mums,df_gender,how='inner')
df_mums_gender
6.
df_mums_gender['birthday']=df_mums_gender['birthday'].astype(str).str.slice(0,4)
df_mums_gender
7.
df_mums_gender['day']=df_mums_gender['day'].astype(str).str.slice(4,6)
df_mums_gender
8.
df_month_goods = pd.DataFrame(df_mums_gender,columns = ['auction_id','cat_id','cat1','day','birthday','gender'])
df_month_goods
9.
df_month_goods.columns = ['auction_id','cat_id','cat1','month','birthday_year','gender']
df_month_goods
10.
boy_list = (df_mums_gender[df_mums_gender['gender']==0]).index.tolist()
boy_list
11.
df_boy = df_month_goods.iloc[boy_list, :]
df_boy
12.
df_month_onegoods = df_boy.groupby('month')['cat1'].value_counts()
df_month_onegoods
13.
df_month_onegoods_label = df_month_onegoods.index.tolist()
df_month_onegoods_label
14.
import pandas as pd
df_month_boy_one = pd.DataFrame(df_month_onegoods_label,columns =['month','boy_cat1'])
df_month_boy_one
15.
df_month_onegoods_value = df_month_onegoods.tolist()
df_month_onegoods_value
16.
df_month_boy_one['numbers'] = df_month_onegoods_value
df_month_boy_one
17.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# df_mums_gender['birthday']=df_mums_gender['birthday'].astype(str).str.slice(0,4)
dfp = df_month_boy_one.pivot_table(index='month', columns='boy_cat1', values='numbers')# plot
# fig = plt.figure(figsize=(width, height), dpi=dpi)
dfp.plot(kind='bar', figsize=(15, 10), rot=0)
plt.xlabel("月份")# 增加纵轴名称
plt.ylabel("数量")
plt.legend(fontsize = 10)
# 增加标题
plt.title("男婴在不同月份喜欢购买的一级商品")
plt.show()
18.总代码·一
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import Series, DataFrame
df_gender = pd.read_csv('./mum_baby.csv')
df_mums = pd.read_csv('./mum_baby_trade_history.csv')
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
df_mums['cat_id']
data_boy = df_mums['cat_id'].value_counts()
label_boy = data_boy.index.tolist()
value_boy = data_boy.tolist()
data_pair_boy = [i for i in zip(label_boy,value_boy)]
data_pair_boy
data = data_pair_boy[:20]
data
df = pd.DataFrame(data,columns =['商品二级分类','数量'])
df
df.plot(kind='bar',x = '商品二级分类',y = '数量',title = '商品二级分类的前二十名')
plt.show()
19.总代码二
df_mums_gender = pd.merge(df_mums,df_gender,how='inner')
df_mums_gender['birthday']=df_mums_gender['birthday'].astype(str).str.slice(0,4)
df_mums_gender['day']=df_mums_gender['day'].astype(str).str.slice(4,6)
df_month_goods = pd.DataFrame(df_mums_gender,columns = ['auction_id','cat_id','cat1','day','birthday','gender'])
df_month_goods.columns = ['auction_id','cat_id','cat1','month','birthday_year','gender']
boy_list = (df_mums_gender[df_mums_gender['gender']==0]).index.tolist()
df_boy = df_month_goods.iloc[boy_list, :]
df_month_onegoods = df_boy.groupby('month')['cat1'].value_counts()
df_month_onegoods_label = df_month_onegoods.index.tolist()
df_month_boy_one = pd.DataFrame(df_month_onegoods_label,columns =['month','boy_cat1'])
df_month_onegoods_value = df_month_onegoods.tolist()
df_month_boy_one['numbers'] = df_month_onegoods_value
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# df_mums_gender['birthday']=df_mums_gender['birthday'].astype(str).str.slice(0,4)
dfp = df_month_boy_one.pivot_table(index='month', columns='boy_cat1', values='numbers')# plot
# fig = plt.figure(figsize=(width, height), dpi=dpi)
dfp.plot(kind='bar', figsize=(15, 10), rot=0)
plt.xlabel("月份")# 增加纵轴名称
plt.ylabel("数量")
plt.legend(fontsize = 10)
# 增加标题
plt.title("男婴在不同月份喜欢购买的一级商品")
plt.show()