zjrwtx/gist:fcd1f56bbe64a9967d04da3476be5753

## gistfile1.txt
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
import matplotlib.pyplot as plt

# Load the data from the Excel file
data = pd.read_excel("../检验推荐系统调查数据.xlsx")

# Calculate the average values for each column
average_values = data.mean(numeric_only=True)

# Columns to plot
columns = ["推荐准确性", "推荐相关性", "操作便捷性", "生成报告时间", "数据隐私保护", "系统可接受性"]

# Values to plot
values = average_values[columns]
plt.rcParams['font.family'] = ['SimHei']  # 使用字体名称

# Create a bar plot with adjusted width
plt.figure(figsize=(3.5,4))
plt.bar(values.index, values.values, color='skyblue', width=0.3)  # 调整柱状宽度为0.6

# Set font size for x and y ticks
plt.xticks(rotation=45, fontsize=10)  # 设置x轴标签字体大小为12
plt.yticks(fontsize=12)  # 设置y轴标签字体大小为12

# Set font size for axis labels and title
plt.xlabel('指标', fontsize=10)  # 设置x轴标题字体大小
plt.ylabel('平均分', fontsize=10)  # 设置y轴标题字体大小
plt.title('推荐系统各项指标的平均分', fontsize=12)  # 设置图表标题字体大小

# Set the limit for y-axis
plt.ylim(0, 10)

# Add value labels on the bars
for i, v in enumerate(values.values):
    plt.text(i, v + 0.1, round(v, 2), ha='center', fontsize=10)  # 设置数值标签字体大小为12

# Save the plot as a PNG file
plt.savefig('recommendation_system_average_scores.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('recommendation_system_average_scores.tiff', format='tiff', bbox_inches='tight')

# Display the plot
plt.tight_layout()
plt.show()


# In[3]:


import seaborn as sns

# Compute the correlation matrix
correlation_matrix = data[columns].corr()

# Set up the matplotlib figure
plt.figure(figsize=(10, 8))

# Generate a heatmap of the correlation matrix
# sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 22})

sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 22}, cbar=True, cbar_kws={'label': 'Correlation Coefficient'})


# Title and labels
plt.title('推荐系统各项指标的相关性分析', fontsize=20)
plt.xticks(rotation=45, ha="right", fontsize=16)
plt.yticks(rotation=0, ha="right", fontsize=16)
plt.savefig('Heat map correlation analysis.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('Heat map correlation analysis.tiff', format='tiff', bbox_inches='tight')


# Show the plot
plt.tight_layout()
plt.show()


# In[ ]:


# In[7]:


import pandas as pd
from scipy.stats import pearsonr

# 1. 加载数据
file_path = '../检验推荐系统调查数据.xlsx'  # 替换为您的Excel文件路径
data = pd.read_excel(file_path)

# 2. 选择用于相关性分析的列
columns = ['推荐准确性', '推荐相关性', '操作便捷性', '生成报告时间', '数据隐私保护', '系统可接受性']

# 3. 检查并处理缺失值
# 检查缺失值
missing_values = data[columns].isnull().sum()
# 如果有缺失值，使用每列的均值填充
if missing_values.any():
    data[columns] = data[columns].fillna(data[columns].mean())

# 4. 计算每对列之间的p值
p_values = {}
for i, col1 in enumerate(columns):
    for j, col2 in enumerate(columns):
        if i < j:  # 只计算上三角部分，避免重复
            _, p_value = pearsonr(data[col1], data[col2])
            p_values[(col1, col2)] = p_value

# 5. 输出p值
for (col1, col2), p_value in p_values.items():
    print(f"p值 between {col1} and {col2}: {p_value:.2f}")


# In[6]:


import pandas as pd
from scipy.stats import kruskal
from scipy.stats import pearsonr

# 定义文件路径
file_path = '../检验推荐系统调查数据.xlsx'

# 加载数据
data = pd.read_excel(file_path)

# 定义评分指标列
quantitative_columns = ['推荐准确性', '推荐相关性', '操作便捷性', '生成报告时间', '数据隐私保护', '系统可接受性']
# 计算描述性统计信息
descriptive_stats = data[quantitative_columns].describe()
print("描述性统计信息:\n", descriptive_stats)

# 计算相关性矩阵
correlation_matrix = data[quantitative_columns].corr()
print("\n相关性矩阵:\n", correlation_matrix)

# 计算总体满意度
data['overall_satisfaction'] = data[quantitative_columns].mean(axis=1)

# 检查异常值并删除它们
def find_and_remove_outliers(data, columns):
    outliers = {}
    for col in columns:
        q25 = data[col].quantile(0.25)
        q75 = data[col].quantile(0.75)
        iqr = q75 - q25
        lower_bound = q25 - 1.5 * iqr
        upper_bound = q75 + 1.5 * iqr
        outliers[col] = data[(data[col] < lower_bound) | (data[col] > upper_bound)][col]
    outliers_to_remove = pd.concat([outliers[col] for col in outliers]).drop_duplicates().index
    return data.drop(outliers_to_remove)

data_cleaned = find_and_remove_outliers(data, quantitative_columns)

# 使用Kruskal-Wallis H检验测试评分指标之间的差异
kruskal_results_cleaned = kruskal(
    data_cleaned['推荐准确性'],
    data_cleaned['推荐相关性'],
    data_cleaned['操作便捷性'],
    data_cleaned['生成报告时间'],
    data_cleaned['数据隐私保护'],
    data_cleaned['系统可接受性']
)

print("\nKruskal-Wallis H检验结果:\n", kruskal_results_cleaned)

# 使用Pearson相关系数检验总体满意度与其他指标的相关性
pearson_results = {}
for col in quantitative_columns:
    pearson_results[col], _ = pearsonr(data_cleaned['overall_satisfaction'], data_cleaned[col])

print("\nPearson相关系数检验结果:\n", pearson_results)


# In[3]:


# Analyzing the "系统改进建议" column to determine common suggestions

suggestions = data["系统改进建议"].value_counts()
# pd.set_option('display.max_colwidth', 100)
# 使用 str.wrap() 来换行文本，这里设置每行最大字符数为 30
# data["系统改进建议"] = data["系统改进建议"].str.wrap(40)
data["系统改进建议"] = data["系统改进建议"].str.replace(r'[。．]', '', regex=True)

# Analyzing the "系统对工作流程的影响" column to determine the impact on work流程
workflow_impact = data["系统对工作流程的影响"].value_counts()

# Analyzing the "系统对医疗质量的影响" column to determine the impact on medical quality
quality_impact = data["系统对医疗质量的影响"].value_counts()

# Plotting a bar chart for the top suggestions in "系统改进建议"
top_suggestions = suggestions.head(10)

plt.figure(figsize=(6, 6))
top_suggestions.plot(kind='barh', color='skyblue')
# Set font size for x and y ticks
plt.xticks(fontsize=16)  # 设置x轴标签字体大小为12
plt.yticks(fontsize=16)  # 设置y轴标签字体大小为12

plt.xlabel('出现次数', fontsize=16)
plt.ylabel('系统改进建议', fontsize=16)
plt.title('最常见的10条系统改进建议', fontsize=16)
plt.gca().invert_yaxis()  # Invert the y-axis to display the most frequent suggestions at the top
# Save the plot as a PNG file
plt.savefig('最常见的10条系统改进建议.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('最常见的10条系统改进建议.tiff', format='tiff', bbox_inches='tight')
plt.show()


# In[10]:


# Analyzing the "系统改进建议" column to determine common suggestions

suggestions = data["系统对工作流程的影响"].value_counts()
# pd.set_option('display.max_colwidth', 100)
# 使用 str.wrap() 来换行文本，这里设置每行最大字符数为 30
# data["系统改进建议"] = data["系统改进建议"].str.wrap(40)
data["系统对工作流程的影响"] = data["系统对工作流程的影响"].str.replace(r'[。．]', '', regex=True)


# Plotting a bar chart for the top suggestions in "系统改进建议"
top_suggestions = suggestions.head(10)

plt.figure(figsize=(6, 6))
top_suggestions.plot(kind='barh', color='lightcoral')
# Set font size for x and y ticks
plt.xticks(fontsize=16)  # 设置x轴标签字体大小为12
plt.yticks(fontsize=16)  # 设置y轴标签字体大小为12

plt.xlabel('出现次数', fontsize=16)
plt.ylabel('系统对工作流程的影响', fontsize=16)
plt.title('最常见的10种系统对工作流程的影响', fontsize=16)
plt.gca().invert_yaxis()  # Invert the y-axis to display the most frequent suggestions at the top
# Save the plot as a PNG file
plt.savefig('系统对工作流程的影响.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('系统对工作流程的影响.tiff', format='tiff', bbox_inches='tight')
plt.show()


# In[4]:


# Analyzing the "系统改进建议" column to determine common suggestions

suggestions = data["系统对医疗质量的影响"].value_counts()
# pd.set_option('display.max_colwidth', 100)
# 使用 str.wrap() 来换行文本，这里设置每行最大字符数为 30
# data["系统改进建议"] = data["系统改进建议"].str.wrap(40)
data["系统对医疗质量的影响"] = data["系统对医疗质量的影响"].str.replace(r'[。．]', '', regex=True)


# Plotting a bar chart for the top suggestions in "系统改进建议"
top_suggestions = suggestions.head(10)

plt.figure(figsize=(6, 6))
top_suggestions.plot(kind='barh', color='lightgreen')
# Set font size for x and y ticks
plt.xticks(fontsize=16)  # 设置x轴标签字体大小为12
plt.yticks(fontsize=16)  # 设置y轴标签字体大小为12

plt.xlabel('出现次数', fontsize=16)
plt.ylabel('系统对医疗质量的影响', fontsize=16)
plt.title('最常见的10种系统对医疗质量的影响', fontsize=16)
plt.gca().invert_yaxis()  # Invert the y-axis to display the most frequent suggestions at the top
# Save the plot as a PNG file
plt.savefig('系统对工作流程的影响.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('系统对工作流程的影响.tiff', format='tiff', bbox_inches='tight')
plt.show()


# In[10]:


import matplotlib.pyplot as plt
import seaborn as sns

# 设置字体为支持中文的字体，例如“SimHei”
plt.rcParams['font.sans-serif'] = ['SimHei']
# 为了支持负号，您还需要设置字体大小
plt.rcParams['font.size'] = 16

# 创建图形
plt.figure(figsize=(6, 4))


# Boxplots for each score category
sns.boxplot(x=data['推荐准确性'], width=0.2)
plt.xticks(fontsize=16)
plt.title('推荐准确性的分布')
plt.ylabel('推荐准确性')
# Save the plot as a PNG file
plt.savefig('推荐准确性的分布.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('推荐准确性的分布.tiff', format='tiff', bbox_inches='tight')
plt.show()


# Boxplots for other score categories
sns.boxplot(x=data['推荐相关性'], width=0.2)
plt.xticks(fontsize=16)
plt.title('推荐相关性的分布')
plt.ylabel('推荐相关性')

# Save the plot as a PNG file
plt.savefig('推荐相关性的分布.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('推荐相关性的分布.tiff', format='tiff', bbox_inches='tight')
plt.show()


sns.boxplot(x=data['操作便捷性'], width=0.2)
plt.xticks(fontsize=16)
plt.title('操作便捷性的分布')
plt.ylabel('操作便捷性')


# Save the plot as a PNG file
plt.savefig('操作便捷性的分布.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('操作便捷性的分布.tiff', format='tiff', bbox_inches='tight')
plt.show()


sns.boxplot(x=data['生成报告时间'], width=0.2)
plt.xticks(fontsize=16)
plt.title('生成报告时间的分布')
plt.ylabel('生成报告时间的总时间')

# Save the plot as a PNG file
plt.savefig('生成报告时间的分布.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('生成报告时间的分布.tiff', format='tiff', bbox_inches='tight')
plt.show()


sns.boxplot(x=data['数据隐私保护'], width=0.2)
plt.xticks(fontsize=16)
plt.title('数据隐私保护的分布')
plt.ylabel('数据隐私保护')
# Save the plot as a PNG file
plt.savefig('数据隐私保护的分布.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('数据隐私保护的分布.tiff', format='tiff', bbox_inches='tight')
plt.show()


# In[11]:


import matplotlib.pyplot as plt
import seaborn as sns

# 设置字体为支持中文的字体，例如“SimHei”
plt.rcParams['font.sans-serif'] = ['SimHei']
# 为了支持负号，您还需要设置字体大小
plt.rcParams['font.size'] = 16

# 创建图形
plt.figure(figsize=(6, 4))


sns.boxplot(x=data['系统可接受性'], width=0.2)
plt.xticks(fontsize=16)
plt.title('系统可接受性的分布')
plt.ylabel('系统可接受性')
# Save the plot as a PNG file
plt.savefig('系统可接受性的分布.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('系统可接受性的分布.tiff', format='tiff', bbox_inches='tight')
plt.show()


# In[12]:


import matplotlib.pyplot as plt
import seaborn as sns

# 设置字体为支持中文的字体，例如“SimHei”
plt.rcParams['font.sans-serif'] = ['SimHei']
# 为了支持负号，您还需要设置字体大小
plt.rcParams['font.size'] = 16

# 创建图形
plt.figure(figsize=(6, 4))


# 获取当前轴对象
ax = plt.gca()

# 设置边框线厚度
ax.spines['top'].set_linewidth(0.1)
ax.spines['bottom'].set_linewidth(0.1)
ax.spines['left'].set_linewidth(0.2)
ax.spines['right'].set_linewidth(0.1)


# 假设data是一个pandas DataFrame，且包含'系统可接受性'这一列
sns.boxplot(x=data['系统可接受性'], width=0.2,)
plt.xticks(fontsize=16)
plt.title('系统可接受性的分布')
plt.ylabel('系统可接受性')

# 添加垂直线，例如在x=0.5的位置
for x in [0.5, 2, 3.5,5.5,8,10]:  # 您可以根据需要更改这些x坐标值
    plt.axvline(x=x, color='gray', linestyle='-', linewidth=0.3)

# Save the plot as a PNG file
plt.savefig('系统可接受性的分布.png', format='png', bbox_inches='tight')

# Save the plot as a TIFF file
plt.savefig('系统可接受性的分布.tiff', format='tiff', bbox_inches='tight')

plt.show()


# In[ ]:
	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	import pandas as pd
	import matplotlib.pyplot as plt

	# Load the data from the Excel file
	data = pd.read_excel("../检验推荐系统调查数据.xlsx")

	# Calculate the average values for each column
	average_values = data.mean(numeric_only=True)

	# Columns to plot
	columns = ["推荐准确性", "推荐相关性", "操作便捷性", "生成报告时间", "数据隐私保护", "系统可接受性"]

	# Values to plot
	values = average_values[columns]
	plt.rcParams['font.family'] = ['SimHei'] # 使用字体名称

	# Create a bar plot with adjusted width
	plt.figure(figsize=(3.5,4))
	plt.bar(values.index, values.values, color='skyblue', width=0.3) # 调整柱状宽度为0.6

	# Set font size for x and y ticks
	plt.xticks(rotation=45, fontsize=10) # 设置x轴标签字体大小为12
	plt.yticks(fontsize=12) # 设置y轴标签字体大小为12

	# Set font size for axis labels and title
	plt.xlabel('指标', fontsize=10) # 设置x轴标题字体大小
	plt.ylabel('平均分', fontsize=10) # 设置y轴标题字体大小
	plt.title('推荐系统各项指标的平均分', fontsize=12) # 设置图表标题字体大小

	# Set the limit for y-axis
	plt.ylim(0, 10)

	# Add value labels on the bars
	for i, v in enumerate(values.values):
	plt.text(i, v + 0.1, round(v, 2), ha='center', fontsize=10) # 设置数值标签字体大小为12

	# Save the plot as a PNG file
	plt.savefig('recommendation_system_average_scores.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('recommendation_system_average_scores.tiff', format='tiff', bbox_inches='tight')

	# Display the plot
	plt.tight_layout()
	plt.show()


	# In[3]:


	import seaborn as sns

	# Compute the correlation matrix
	correlation_matrix = data[columns].corr()

	# Set up the matplotlib figure
	plt.figure(figsize=(10, 8))

	# Generate a heatmap of the correlation matrix
	# sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 22})

	sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 22}, cbar=True, cbar_kws={'label': 'Correlation Coefficient'})



	# Title and labels
	plt.title('推荐系统各项指标的相关性分析', fontsize=20)
	plt.xticks(rotation=45, ha="right", fontsize=16)
	plt.yticks(rotation=0, ha="right", fontsize=16)
	plt.savefig('Heat map correlation analysis.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('Heat map correlation analysis.tiff', format='tiff', bbox_inches='tight')



	# Show the plot
	plt.tight_layout()
	plt.show()


	# In[ ]:





	# In[7]:


	import pandas as pd
	from scipy.stats import pearsonr

	# 1. 加载数据
	file_path = '../检验推荐系统调查数据.xlsx' # 替换为您的Excel文件路径
	data = pd.read_excel(file_path)

	# 2. 选择用于相关性分析的列
	columns = ['推荐准确性', '推荐相关性', '操作便捷性', '生成报告时间', '数据隐私保护', '系统可接受性']

	# 3. 检查并处理缺失值
	# 检查缺失值
	missing_values = data[columns].isnull().sum()
	# 如果有缺失值，使用每列的均值填充
	if missing_values.any():
	data[columns] = data[columns].fillna(data[columns].mean())

	# 4. 计算每对列之间的p值
	p_values = {}
	for i, col1 in enumerate(columns):
	for j, col2 in enumerate(columns):
	if i < j: # 只计算上三角部分，避免重复
	_, p_value = pearsonr(data[col1], data[col2])
	p_values[(col1, col2)] = p_value

	# 5. 输出p值
	for (col1, col2), p_value in p_values.items():
	print(f"p值 between {col1} and {col2}: {p_value:.2f}")


	# In[6]:


	import pandas as pd
	from scipy.stats import kruskal
	from scipy.stats import pearsonr

	# 定义文件路径
	file_path = '../检验推荐系统调查数据.xlsx'

	# 加载数据
	data = pd.read_excel(file_path)

	# 定义评分指标列
	quantitative_columns = ['推荐准确性', '推荐相关性', '操作便捷性', '生成报告时间', '数据隐私保护', '系统可接受性']
	# 计算描述性统计信息
	descriptive_stats = data[quantitative_columns].describe()
	print("描述性统计信息:\n", descriptive_stats)

	# 计算相关性矩阵
	correlation_matrix = data[quantitative_columns].corr()
	print("\n相关性矩阵:\n", correlation_matrix)

	# 计算总体满意度
	data['overall_satisfaction'] = data[quantitative_columns].mean(axis=1)

	# 检查异常值并删除它们
	def find_and_remove_outliers(data, columns):
	outliers = {}
	for col in columns:
	q25 = data[col].quantile(0.25)
	q75 = data[col].quantile(0.75)
	iqr = q75 - q25
	lower_bound = q25 - 1.5 * iqr
	upper_bound = q75 + 1.5 * iqr
	outliers[col] = data[(data[col] < lower_bound) \| (data[col] > upper_bound)][col]
	outliers_to_remove = pd.concat([outliers[col] for col in outliers]).drop_duplicates().index
	return data.drop(outliers_to_remove)

	data_cleaned = find_and_remove_outliers(data, quantitative_columns)

	# 使用Kruskal-Wallis H检验测试评分指标之间的差异
	kruskal_results_cleaned = kruskal(
	data_cleaned['推荐准确性'],
	data_cleaned['推荐相关性'],
	data_cleaned['操作便捷性'],
	data_cleaned['生成报告时间'],
	data_cleaned['数据隐私保护'],
	data_cleaned['系统可接受性']
	)

	print("\nKruskal-Wallis H检验结果:\n", kruskal_results_cleaned)

	# 使用Pearson相关系数检验总体满意度与其他指标的相关性
	pearson_results = {}
	for col in quantitative_columns:
	pearson_results[col], _ = pearsonr(data_cleaned['overall_satisfaction'], data_cleaned[col])

	print("\nPearson相关系数检验结果:\n", pearson_results)


	# In[3]:


	# Analyzing the "系统改进建议" column to determine common suggestions

	suggestions = data["系统改进建议"].value_counts()
	# pd.set_option('display.max_colwidth', 100)
	# 使用 str.wrap() 来换行文本，这里设置每行最大字符数为 30
	# data["系统改进建议"] = data["系统改进建议"].str.wrap(40)
	data["系统改进建议"] = data["系统改进建议"].str.replace(r'[。．]', '', regex=True)

	# Analyzing the "系统对工作流程的影响" column to determine the impact on work流程
	workflow_impact = data["系统对工作流程的影响"].value_counts()

	# Analyzing the "系统对医疗质量的影响" column to determine the impact on medical quality
	quality_impact = data["系统对医疗质量的影响"].value_counts()

	# Plotting a bar chart for the top suggestions in "系统改进建议"
	top_suggestions = suggestions.head(10)

	plt.figure(figsize=(6, 6))
	top_suggestions.plot(kind='barh', color='skyblue')
	# Set font size for x and y ticks
	plt.xticks(fontsize=16) # 设置x轴标签字体大小为12
	plt.yticks(fontsize=16) # 设置y轴标签字体大小为12

	plt.xlabel('出现次数', fontsize=16)
	plt.ylabel('系统改进建议', fontsize=16)
	plt.title('最常见的10条系统改进建议', fontsize=16)
	plt.gca().invert_yaxis() # Invert the y-axis to display the most frequent suggestions at the top
	# Save the plot as a PNG file
	plt.savefig('最常见的10条系统改进建议.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('最常见的10条系统改进建议.tiff', format='tiff', bbox_inches='tight')
	plt.show()


	# In[10]:


	# Analyzing the "系统改进建议" column to determine common suggestions

	suggestions = data["系统对工作流程的影响"].value_counts()
	# pd.set_option('display.max_colwidth', 100)
	# 使用 str.wrap() 来换行文本，这里设置每行最大字符数为 30
	# data["系统改进建议"] = data["系统改进建议"].str.wrap(40)
	data["系统对工作流程的影响"] = data["系统对工作流程的影响"].str.replace(r'[。．]', '', regex=True)


	# Plotting a bar chart for the top suggestions in "系统改进建议"
	top_suggestions = suggestions.head(10)

	plt.figure(figsize=(6, 6))
	top_suggestions.plot(kind='barh', color='lightcoral')
	# Set font size for x and y ticks
	plt.xticks(fontsize=16) # 设置x轴标签字体大小为12
	plt.yticks(fontsize=16) # 设置y轴标签字体大小为12

	plt.xlabel('出现次数', fontsize=16)
	plt.ylabel('系统对工作流程的影响', fontsize=16)
	plt.title('最常见的10种系统对工作流程的影响', fontsize=16)
	plt.gca().invert_yaxis() # Invert the y-axis to display the most frequent suggestions at the top
	# Save the plot as a PNG file
	plt.savefig('系统对工作流程的影响.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('系统对工作流程的影响.tiff', format='tiff', bbox_inches='tight')
	plt.show()


	# In[4]:


	# Analyzing the "系统改进建议" column to determine common suggestions

	suggestions = data["系统对医疗质量的影响"].value_counts()
	# pd.set_option('display.max_colwidth', 100)
	# 使用 str.wrap() 来换行文本，这里设置每行最大字符数为 30
	# data["系统改进建议"] = data["系统改进建议"].str.wrap(40)
	data["系统对医疗质量的影响"] = data["系统对医疗质量的影响"].str.replace(r'[。．]', '', regex=True)


	# Plotting a bar chart for the top suggestions in "系统改进建议"
	top_suggestions = suggestions.head(10)

	plt.figure(figsize=(6, 6))
	top_suggestions.plot(kind='barh', color='lightgreen')
	# Set font size for x and y ticks
	plt.xticks(fontsize=16) # 设置x轴标签字体大小为12
	plt.yticks(fontsize=16) # 设置y轴标签字体大小为12

	plt.xlabel('出现次数', fontsize=16)
	plt.ylabel('系统对医疗质量的影响', fontsize=16)
	plt.title('最常见的10种系统对医疗质量的影响', fontsize=16)
	plt.gca().invert_yaxis() # Invert the y-axis to display the most frequent suggestions at the top
	# Save the plot as a PNG file
	plt.savefig('系统对工作流程的影响.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('系统对工作流程的影响.tiff', format='tiff', bbox_inches='tight')
	plt.show()


	# In[10]:


	import matplotlib.pyplot as plt
	import seaborn as sns

	# 设置字体为支持中文的字体，例如“SimHei”
	plt.rcParams['font.sans-serif'] = ['SimHei']
	# 为了支持负号，您还需要设置字体大小
	plt.rcParams['font.size'] = 16

	# 创建图形
	plt.figure(figsize=(6, 4))


	# Boxplots for each score category
	sns.boxplot(x=data['推荐准确性'], width=0.2)
	plt.xticks(fontsize=16)
	plt.title('推荐准确性的分布')
	plt.ylabel('推荐准确性')
	# Save the plot as a PNG file
	plt.savefig('推荐准确性的分布.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('推荐准确性的分布.tiff', format='tiff', bbox_inches='tight')
	plt.show()



	# Boxplots for other score categories
	sns.boxplot(x=data['推荐相关性'], width=0.2)
	plt.xticks(fontsize=16)
	plt.title('推荐相关性的分布')
	plt.ylabel('推荐相关性')

	# Save the plot as a PNG file
	plt.savefig('推荐相关性的分布.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('推荐相关性的分布.tiff', format='tiff', bbox_inches='tight')
	plt.show()




	sns.boxplot(x=data['操作便捷性'], width=0.2)
	plt.xticks(fontsize=16)
	plt.title('操作便捷性的分布')
	plt.ylabel('操作便捷性')


	# Save the plot as a PNG file
	plt.savefig('操作便捷性的分布.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('操作便捷性的分布.tiff', format='tiff', bbox_inches='tight')
	plt.show()



	sns.boxplot(x=data['生成报告时间'], width=0.2)
	plt.xticks(fontsize=16)
	plt.title('生成报告时间的分布')
	plt.ylabel('生成报告时间的总时间')

	# Save the plot as a PNG file
	plt.savefig('生成报告时间的分布.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('生成报告时间的分布.tiff', format='tiff', bbox_inches='tight')
	plt.show()




	sns.boxplot(x=data['数据隐私保护'], width=0.2)
	plt.xticks(fontsize=16)
	plt.title('数据隐私保护的分布')
	plt.ylabel('数据隐私保护')
	# Save the plot as a PNG file
	plt.savefig('数据隐私保护的分布.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('数据隐私保护的分布.tiff', format='tiff', bbox_inches='tight')
	plt.show()


	# In[11]:


	import matplotlib.pyplot as plt
	import seaborn as sns

	# 设置字体为支持中文的字体，例如“SimHei”
	plt.rcParams['font.sans-serif'] = ['SimHei']
	# 为了支持负号，您还需要设置字体大小
	plt.rcParams['font.size'] = 16

	# 创建图形
	plt.figure(figsize=(6, 4))


	sns.boxplot(x=data['系统可接受性'], width=0.2)
	plt.xticks(fontsize=16)
	plt.title('系统可接受性的分布')
	plt.ylabel('系统可接受性')
	# Save the plot as a PNG file
	plt.savefig('系统可接受性的分布.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('系统可接受性的分布.tiff', format='tiff', bbox_inches='tight')
	plt.show()


	# In[12]:


	import matplotlib.pyplot as plt
	import seaborn as sns

	# 设置字体为支持中文的字体，例如“SimHei”
	plt.rcParams['font.sans-serif'] = ['SimHei']
	# 为了支持负号，您还需要设置字体大小
	plt.rcParams['font.size'] = 16

	# 创建图形
	plt.figure(figsize=(6, 4))


	# 获取当前轴对象
	ax = plt.gca()

	# 设置边框线厚度
	ax.spines['top'].set_linewidth(0.1)
	ax.spines['bottom'].set_linewidth(0.1)
	ax.spines['left'].set_linewidth(0.2)
	ax.spines['right'].set_linewidth(0.1)


	# 假设data是一个pandas DataFrame，且包含'系统可接受性'这一列
	sns.boxplot(x=data['系统可接受性'], width=0.2,)
	plt.xticks(fontsize=16)
	plt.title('系统可接受性的分布')
	plt.ylabel('系统可接受性')

	# 添加垂直线，例如在x=0.5的位置
	for x in [0.5, 2, 3.5,5.5,8,10]: # 您可以根据需要更改这些x坐标值
	plt.axvline(x=x, color='gray', linestyle='-', linewidth=0.3)

	# Save the plot as a PNG file
	plt.savefig('系统可接受性的分布.png', format='png', bbox_inches='tight')

	# Save the plot as a TIFF file
	plt.savefig('系统可接受性的分布.tiff', format='tiff', bbox_inches='tight')

	plt.show()


	# In[ ]: