Excel Analysis by davila7/claude-code-templates
npx skills add https://github.com/davila7/claude-code-templates --skill 'Excel Analysis'使用 pandas 读取 Excel 文件:
import pandas as pd
# 读取 Excel 文件
df = pd.read_excel("data.xlsx", sheet_name="Sheet1")
# 显示前几行数据
print(df.head())
# 基本统计信息
print(df.describe())
处理工作簿中的所有工作表:
import pandas as pd
# 读取所有工作表
excel_file = pd.ExcelFile("workbook.xlsx")
for sheet_name in excel_file.sheet_names:
df = pd.read_excel(excel_file, sheet_name=sheet_name)
print(f"\n{sheet_name}:")
print(df.head())
执行常见的分析任务:
import pandas as pd
df = pd.read_excel("sales.xlsx")
# 分组和聚合
sales_by_region = df.groupby("region")["sales"].sum()
print(sales_by_region)
# 筛选数据
high_sales = df[df["sales"] > 10000]
# 计算指标
df["profit_margin"] = (df["revenue"] - df["cost"]) / df["revenue"]
# 按列排序
df_sorted = df.sort_values("sales", ascending=False)
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
将数据写入 Excel 并设置格式:
import pandas as pd
df = pd.DataFrame({
"Product": ["A", "B", "C"],
"Sales": [100, 200, 150],
"Profit": [20, 40, 30]
})
# 写入 Excel
writer = pd.ExcelWriter("output.xlsx", engine="openpyxl")
df.to_excel(writer, sheet_name="Sales", index=False)
# 获取工作表以进行格式设置
worksheet = writer.sheets["Sales"]
# 自动调整列宽
for column in worksheet.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
worksheet.column_dimensions[column_letter].width = max_length + 2
writer.close()
以编程方式创建数据透视表:
import pandas as pd
df = pd.read_excel("sales_data.xlsx")
# 创建数据透视表
pivot = pd.pivot_table(
df,
values="sales",
index="region",
columns="product",
aggfunc="sum",
fill_value=0
)
print(pivot)
# 保存数据透视表
pivot.to_excel("pivot_report.xlsx")
根据 Excel 数据生成图表:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel("data.xlsx")
# 创建条形图
df.plot(x="category", y="value", kind="bar")
plt.title("按类别划分的销售额")
plt.xlabel("类别")
plt.ylabel("销售额")
plt.tight_layout()
plt.savefig("chart.png")
# 创建饼图
df.set_index("category")["value"].plot(kind="pie", autopct="%1.1f%%")
plt.title("市场份额")
plt.ylabel("")
plt.savefig("pie_chart.png")
清洗和准备 Excel 数据:
import pandas as pd
df = pd.read_excel("messy_data.xlsx")
# 删除重复项
df = df.drop_duplicates()
# 处理缺失值
df = df.fillna(0) # 或使用 df.dropna()
# 去除空白字符
df["name"] = df["name"].str.strip()
# 转换数据类型
df["date"] = pd.to_datetime(df["date"])
df["amount"] = pd.to_numeric(df["amount"], errors="coerce")
# 保存清洗后的数据
df.to_excel("cleaned_data.xlsx", index=False)
合并多个 Excel 文件:
import pandas as pd
# 读取多个文件
df1 = pd.read_excel("sales_q1.xlsx")
df2 = pd.read_excel("sales_q2.xlsx")
# 垂直拼接
combined = pd.concat([df1, df2], ignore_index=True)
# 基于公共列合并
customers = pd.read_excel("customers.xlsx")
sales = pd.read_excel("sales.xlsx")
merged = pd.merge(sales, customers, on="customer_id", how="left")
merged.to_excel("merged_data.xlsx", index=False)
应用条件格式和样式:
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Font
# 创建 Excel 文件
df = pd.DataFrame({
"Product": ["A", "B", "C"],
"Sales": [100, 200, 150]
})
df.to_excel("formatted.xlsx", index=False)
# 加载工作簿以进行格式设置
wb = load_workbook("formatted.xlsx")
ws = wb.active
# 应用条件格式
red_fill = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")
green_fill = PatternFill(start_color="00FF00", end_color="00FF00", fill_type="solid")
for row in range(2, len(df) + 2):
cell = ws[f"B{row}"]
if cell.value < 150:
cell.fill = red_fill
else:
cell.fill = green_fill
# 加粗表头
for cell in ws[1]:
cell.font = Font(bold=True)
wb.save("formatted.xlsx")
read_excel 的 usecols 参数仅读取特定列chunksize 参数engine='openpyxl' 或 engine='xlrd'dtype 参数指定列类型以加快读取速度每周安装次数
0
代码仓库
GitHub 星标数
22.6K
首次出现时间
1970年1月1日
安全审计
Read Excel files with pandas:
import pandas as pd
# Read Excel file
df = pd.read_excel("data.xlsx", sheet_name="Sheet1")
# Display first few rows
print(df.head())
# Basic statistics
print(df.describe())
Process all sheets in a workbook:
import pandas as pd
# Read all sheets
excel_file = pd.ExcelFile("workbook.xlsx")
for sheet_name in excel_file.sheet_names:
df = pd.read_excel(excel_file, sheet_name=sheet_name)
print(f"\n{sheet_name}:")
print(df.head())
Perform common analysis tasks:
import pandas as pd
df = pd.read_excel("sales.xlsx")
# Group by and aggregate
sales_by_region = df.groupby("region")["sales"].sum()
print(sales_by_region)
# Filter data
high_sales = df[df["sales"] > 10000]
# Calculate metrics
df["profit_margin"] = (df["revenue"] - df["cost"]) / df["revenue"]
# Sort by column
df_sorted = df.sort_values("sales", ascending=False)
Write data to Excel with formatting:
import pandas as pd
df = pd.DataFrame({
"Product": ["A", "B", "C"],
"Sales": [100, 200, 150],
"Profit": [20, 40, 30]
})
# Write to Excel
writer = pd.ExcelWriter("output.xlsx", engine="openpyxl")
df.to_excel(writer, sheet_name="Sales", index=False)
# Get worksheet for formatting
worksheet = writer.sheets["Sales"]
# Auto-adjust column widths
for column in worksheet.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
worksheet.column_dimensions[column_letter].width = max_length + 2
writer.close()
Create pivot tables programmatically:
import pandas as pd
df = pd.read_excel("sales_data.xlsx")
# Create pivot table
pivot = pd.pivot_table(
df,
values="sales",
index="region",
columns="product",
aggfunc="sum",
fill_value=0
)
print(pivot)
# Save pivot table
pivot.to_excel("pivot_report.xlsx")
Generate charts from Excel data:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel("data.xlsx")
# Create bar chart
df.plot(x="category", y="value", kind="bar")
plt.title("Sales by Category")
plt.xlabel("Category")
plt.ylabel("Sales")
plt.tight_layout()
plt.savefig("chart.png")
# Create pie chart
df.set_index("category")["value"].plot(kind="pie", autopct="%1.1f%%")
plt.title("Market Share")
plt.ylabel("")
plt.savefig("pie_chart.png")
Clean and prepare Excel data:
import pandas as pd
df = pd.read_excel("messy_data.xlsx")
# Remove duplicates
df = df.drop_duplicates()
# Handle missing values
df = df.fillna(0) # or df.dropna()
# Remove whitespace
df["name"] = df["name"].str.strip()
# Convert data types
df["date"] = pd.to_datetime(df["date"])
df["amount"] = pd.to_numeric(df["amount"], errors="coerce")
# Save cleaned data
df.to_excel("cleaned_data.xlsx", index=False)
Combine multiple Excel files:
import pandas as pd
# Read multiple files
df1 = pd.read_excel("sales_q1.xlsx")
df2 = pd.read_excel("sales_q2.xlsx")
# Concatenate vertically
combined = pd.concat([df1, df2], ignore_index=True)
# Merge on common column
customers = pd.read_excel("customers.xlsx")
sales = pd.read_excel("sales.xlsx")
merged = pd.merge(sales, customers, on="customer_id", how="left")
merged.to_excel("merged_data.xlsx", index=False)
Apply conditional formatting and styles:
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Font
# Create Excel file
df = pd.DataFrame({
"Product": ["A", "B", "C"],
"Sales": [100, 200, 150]
})
df.to_excel("formatted.xlsx", index=False)
# Load workbook for formatting
wb = load_workbook("formatted.xlsx")
ws = wb.active
# Apply conditional formatting
red_fill = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")
green_fill = PatternFill(start_color="00FF00", end_color="00FF00", fill_type="solid")
for row in range(2, len(df) + 2):
cell = ws[f"B{row}"]
if cell.value < 150:
cell.fill = red_fill
else:
cell.fill = green_fill
# Bold headers
for cell in ws[1]:
cell.font = Font(bold=True)
wb.save("formatted.xlsx")
read_excel with usecols to read specific columns onlychunksize for very large filesengine='openpyxl' or engine='xlrd' based on file typedtype parameter to specify column types for faster readingWeekly Installs
0
Repository
GitHub Stars
22.6K
First Seen
Jan 1, 1970
Security Audits
DOCX文件创建、编辑与分析完整指南 - 使用docx-js、Pandoc和Python脚本
46,400 周安装