Bootstrap

阿里云天池大众点评3W条评论的数据分析

描述

广州八大热门糖水店的评论(顾客id、评论时间、评分、评论内容、口味、环境、服务、店铺ID)

任务1:对顾客的评论内容绘制出词云图

#绘制词云图
list1=df['cus_comment'][~df['cus_comment'].isna()].values.tolist()
ComName_str=' '.join(list1)
stopwords=[',','了','吧','?','。','啊','哈哈','呵呵','另外','当然','但是','的','不过','感觉','就是','而且','也是','然后','因为','啦','也是','虽然','都','是','点','我','这里','没有','其实','就','有','但','所以','觉得','上下','时候','九','在']
wc=WordCloud(scale=3,background_color="white",max_words=1000,width=800,height=500,font_path='msyh.ttc',stopwords=stopwords)
wc.generate_from_text(ComName_str)
wc.to_file('评论云图.png')
wc.to_image()

任务2:绘制(口味、环境、服务)的柱形图

my_font = font_manager.FontProperties()
dict1={'非常好': 0, '很好': 0, '好': 0, '一般': 0,'差': 0, '无': 0}
for key,value in df['kouwei'].items():
        dict1[value]=dict1[value]+1
dict2={'非常好': 0, '很好': 0, '好': 0, '一般': 0,'差': 0, '无': 0}
for key,value in df['fuwu'].items():
        dict2[value]=dict2[value]+1
dict3={'非常好': 0, '很好': 0, '好': 0, '一般': 0,'差': 0, '无': 0}
for key,value in df['huanjing'].items():
        dict3[value]=dict3[value]+1

labels = ['非常好', '很好', '好', '一般','差', '无']
b_1=list(dict1.values())
b_2=list(dict2.values())
b_3=list(dict3.values())

bar_width = 0.2

bar_1 = list(range(len(labels)))
bar_2 = [i+bar_width for i in bar_1]
bar_3 = [i+bar_width for i in bar_2]

#设置图片尺寸与清晰度
plt.figure(figsize=(20, 8), dpi=80)

#导入数据,绘制条形图
plt.bar(range(len(labels)), b_1, width=bar_width, label='口味')
plt.bar(bar_2, b_2, width=bar_width, label='服务')
plt.bar(bar_3, b_3, width=bar_width, label='环境')
x=np.arange(6)
#设置数据标签
for a, b in zip(x, b_1):
    plt.text(a, b + 1, '%d' % b, ha='center', va='bottom')
for a, b in zip(x, b_2):
    plt.text(a+0.2, b + 1, '%d' % b, ha='center', va='bottom')
for a, b in zip(x, b_3):
    plt.text(a+0.4, b + 1, '%d' % b, ha='center', va='bottom')
# 添加标题
plt.title('口味服务环境图', fontproperties=my_font, size=20)
# 添加xy轴
plt.xlabel('评价', fontproperties=my_font)
plt.ylabel('数量', fontproperties=my_font)
# x轴刻度
plt.xticks(bar_2, labels, fontproperties=my_font, size=15)
plt.legend(prop=my_font)
# 展示效果图
plt.show()

任务3:绘制评分的饼图

dict4={}
for key,value in df['stars'][~df['stars'].isna()].items():
    if value not in dict4:
        dict4[value]=1
    else:
        dict4[value] = dict4[value]+1
print(dict4)
Count=list(dict4.values())
Star={'五星','四星','三星','二星','一星'}
explode=[0.01,0.01,0.01,0.01,0.01]
plt.figure(figsize=(6,6))
plt.pie(Count,explode=explode,labels=Star,autopct='%1.2f%%')
plt.title('点评各星级占比')
plt.show()

任务4:统计八家店铺评论数分布

#统计八家店铺评论数分布
dict5={}
for key,value in df['shopID'][~df['cus_comment'].isna()].items():
    if value not in dict5:
        dict5[value]=1
    else:
        dict5[value]=dict5[value]+1;
Count=list(dict5.values())
Shop=list(dict5.keys())
plt.figure(figsize=(8,8))
plt.pie(Count,labels=Shop,autopct='%1.2f%%')
plt.title('店铺评论数分布')
plt.legend(loc='upper right',title='店铺ID')#设置图例在右上
plt.show()
dict6={}
for key,value in df['month'].items():
    if value not in dict6:
        dict6[value]=1
    else:
        dict6[value] = 1+dict6[value]
dict6=dict(sorted(dict6.items(), key=operator.itemgetter(0)))

任务5:店铺月平均评论数变化趋势

dict6={}
for key,value in df['month'].items():
    if value not in dict6:
        dict6[value]=1
    else:
        dict6[value] = 1+dict6[value]
dict6=dict(sorted(dict6.items(), key=operator.itemgetter(0)))

#店铺月平均评论数变化趋势
x_axis_data = list(dict6.keys())  # x
y_axis_data = list(dict6.values())  # y

for x, y in zip(x_axis_data, y_axis_data):
    plt.text(x, y + 0.3, '%.00f' % y, ha='center', va='bottom', fontsize=7.5)  # y_axis_data1加标签数据

plt.plot(x_axis_data, y_axis_data, 'bo--', alpha=0.5, linewidth=1)  # 'bo-'表示蓝色实线,数据点实心原点标注
## plot中参数的含义分别是横轴值,纵轴值,线的形状('s'方块,'o'实心圆点,'*'五角星   ...,颜色,透明度,线的宽度和标签 ,

plt.legend()  # 显示上面的label
plt.xlabel('month')  # x_label
plt.ylabel('comment_number')  # y_label
plt.title('月平均数随时间的变化')
plt.show()

任务6:一周中的每一天的评论数随时间的分布

#一周中的每一天的评论数随时间的分布
def tongji(index):
    l = df['hour'][df['weekday'] ==index].values.tolist()
    ret=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    for i in l:
        ret[i]=ret[i]+1
    return ret

y_axis_data0 =tongji(0)
y_axis_data1 =tongji(1)
y_axis_data2 =tongji(2)
y_axis_data3 =tongji(3)
y_axis_data4 =tongji(4)
y_axis_data5 =tongji(5)
y_axis_data6 =tongji(6)
x_axis_data=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
#画图
plt.plot(x_axis_data, y_axis_data0, 'ko-', alpha=0.5, linewidth=1, label='Sunday')
plt.plot(x_axis_data, y_axis_data1, 'go-', alpha=0.5, linewidth=1, label='Monday')
plt.plot(x_axis_data, y_axis_data2, 'bo-', alpha=0.5, linewidth=1, label='Tuesday')
plt.plot(x_axis_data, y_axis_data3, 'yo-', alpha=0.5, linewidth=1, label='Wednesday')
plt.plot(x_axis_data, y_axis_data4, 'ro-', alpha=0.5, linewidth=1, label='Thursday')
plt.plot(x_axis_data, y_axis_data5, 'mo-', alpha=0.5, linewidth=1, label='Friday')
plt.plot(x_axis_data, y_axis_data6, 'co-', alpha=0.5, linewidth=1, label='Saturday')

plt.legend()
plt.xlabel('time')
plt.ylabel('number')  # accuracy

plt.show()

;