【SXC_231228】使用Sklearn决策树分析商品评价有用性

发布时间:2023-12-28 付费文章:9.9元

 

import random
import string
from datetime import datetime

def generate_random_string(length=3):
    characters = string.ascii_uppercase
    return ''.join(random.choice(characters) for _ in range(length))

def generate_timestamped_string(separator='_'):
    timestamp = datetime.now().strftime('%y%m%d') # %H%M%S
    random_part = generate_random_string(length=3)
    return random_part+separator+timestamp

timestamped_string = generate_timestamped_string()
print('【{0}】'.format(timestamped_string))

【Talk is cheap】

import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示中文
plt.rcParams['axes.unicode_minus'] = False # 显示负号
warnings.filterwarnings("ignore")
%matplotlib inline

df.columns
Index(['Product_ID', '产品名称', '有用人数', 'Unnamed: 3', '权威性', 'Unnamed: 5', '评论者',
       '评论者打分', 'Unnamed: 8', 'Unnamed: 9', '一致性', '客观性', '标题', '发布时间',
       'Unnamed: 14', '发布时间距今', '时效性', '价格', '具体性', '产品评论数量', 'Unnamed: 20',
       '产品平均得分', '评价正文', '评论字数', '完整性1', '评价正文.1', '情感性', 'Unnamed: 27',
       '发布时长', '完整性2', '分母', '有用性', 'Unnamed: 32', 'Unnamed: 33', '800-1200',
       'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38'],
      dtype='object')

...


from sklearn import tree
tree_clf = tree.DecisionTreeClassifier(criterion="gini",max_depth=3)
tree_clf.fit(train_x, train_y)


from sklearn.metrics import classification_report

# 使用训练好的模型对训练数据进行预测
train_y_pred = tree_clf.predict(train_x)

# 生成并打印分类报告
report = classification_report(train_y, train_y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      0.96      0.98    117108
           1       0.85      1.00      0.92     23569

    accuracy                           0.97    140677
   macro avg       0.92      0.98      0.95    140677
weighted avg       0.97      0.97      0.97    140677
 

注1:支付宝扫下图绿码打赏后,再点击 直接获取↑

注2:如忘记保存或后续查看,可凭订单号 手动获取