重要前提
安装AI Skills的关键前提是:必须科学上网,且开启TUN模式,这一点至关重要,直接决定安装能否顺利完成,在此郑重提醒三遍:科学上网,科学上网,科学上网。查看完整安装教程 →
npx skills add https://github.com/personamanagmentlayer/pcl --skill r-expert为 R 编程、统计分析、数据可视化和数据科学提供专业指导。
# Vectors
numbers <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie")
# Data frames
df <- data.frame(
id = 1:5,
name = c("Alice", "Bob", "Charlie", "David", "Eve"),
age = c(25, 30, 35, 28, 32),
salary = c(50000, 60000, 55000, 52000, 58000)
)
# Subsetting
df[df$age > 30, ] # Rows where age > 30
df[, c("name", "age")] # Select columns
# Functions
calculate_mean <- function(x) {
sum(x) / length(x)
}
# Apply family
sapply(df$age, function(x) x * 2)
lapply(list(1:5, 6:10), sum)
# Control structures
if (mean(df$age) > 30) {
print("Average age is above 30")
} else {
print("Average age is 30 or below")
}
# Loops
for (i in 1:nrow(df)) {
print(df$name[i])
}
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
library(dplyr)
library(tidyr)
library(stringr)
# dplyr operations
df %>%
filter(age > 28) %>%
select(name, age, salary) %>%
mutate(
salary_bonus = salary * 1.1,
age_group = case_when(
age < 30 ~ "Young",
age < 35 ~ "Mid-career",
TRUE ~ "Senior"
)
) %>%
arrange(desc(salary)) %>%
group_by(age_group) %>%
summarise(
count = n(),
avg_salary = mean(salary),
total_salary = sum(salary)
)
# Reshaping data
wide_data <- data.frame(
id = 1:3,
year_2021 = c(100, 200, 150),
year_2022 = c(120, 210, 160)
)
# Wide to long
long_data <- wide_data %>%
pivot_longer(
cols = starts_with("year"),
names_to = "year",
values_to = "value",
names_prefix = "year_"
)
# Long to wide
wide_again <- long_data %>%
pivot_wider(
names_from = year,
values_from = value,
names_prefix = "year_"
)
# String operations
df %>%
mutate(
name_upper = str_to_upper(name),
name_length = str_length(name),
first_letter = str_sub(name, 1, 1)
)
# Joining data
df1 <- data.frame(id = 1:3, value1 = c("A", "B", "C"))
df2 <- data.frame(id = 2:4, value2 = c("X", "Y", "Z"))
inner_join(df1, df2, by = "id")
left_join(df1, df2, by = "id")
full_join(df1, df2, by = "id")
library(ggplot2)
# Basic scatter plot
ggplot(df, aes(x = age, y = salary)) +
geom_point(size = 3, color = "blue") +
geom_smooth(method = "lm", se = TRUE) +
labs(
title = "Age vs Salary",
x = "Age (years)",
y = "Salary ($)"
) +
theme_minimal()
# Bar plot with facets
ggplot(df, aes(x = name, y = salary, fill = age_group)) +
geom_col() +
facet_wrap(~ age_group) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Box plot
ggplot(df, aes(x = age_group, y = salary)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.2, alpha = 0.5)
# Histogram with density
ggplot(df, aes(x = salary)) +
geom_histogram(aes(y = ..density..), bins = 10, fill = "steelblue") +
geom_density(color = "red", size = 1)
# Time series
ggplot(time_series_df, aes(x = date, y = value)) +
geom_line(color = "darkgreen") +
geom_point() +
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Descriptive statistics
summary(df)
mean(df$age)
median(df$salary)
sd(df$age)
var(df$salary)
quantile(df$age, probs = c(0.25, 0.5, 0.75))
# Correlation
cor(df$age, df$salary)
cor.test(df$age, df$salary)
# T-test
t.test(df$salary ~ df$gender)
# ANOVA
model <- aov(salary ~ age_group, data = df)
summary(model)
TukeyHSD(model)
# Linear regression
lm_model <- lm(salary ~ age + experience, data = df)
summary(lm_model)
# Predictions
new_data <- data.frame(age = c(30, 35), experience = c(5, 8))
predict(lm_model, new_data, interval = "confidence")
# Multiple regression
multi_model <- lm(salary ~ age + experience + education, data = df)
summary(multi_model)
# Check assumptions
par(mfrow = c(2, 2))
plot(multi_model)
# Logistic regression
logit_model <- glm(outcome ~ age + salary,
data = df,
family = binomial(link = "logit"))
summary(logit_model)
library(forecast)
# Create time series
ts_data <- ts(data, start = c(2020, 1), frequency = 12)
# Decomposition
decomposed <- decompose(ts_data)
plot(decomposed)
# ARIMA model
auto_arima <- auto.arima(ts_data)
summary(auto_arima)
# Forecasting
forecast_result <- forecast(auto_arima, h = 12)
plot(forecast_result)
# Accuracy metrics
accuracy(forecast_result)
library(caret)
library(randomForest)
# Split data
set.seed(123)
train_index <- createDataPartition(df$outcome, p = 0.8, list = FALSE)
train_data <- df[train_index, ]
test_data <- df[-train_index, ]
# Train model
rf_model <- randomForest(
outcome ~ .,
data = train_data,
ntree = 500,
importance = TRUE
)
# Predictions
predictions <- predict(rf_model, test_data)
# Confusion matrix
confusionMatrix(predictions, test_data$outcome)
# Feature importance
importance(rf_model)
varImpPlot(rf_model)
# Cross-validation
train_control <- trainControl(
method = "cv",
number = 10,
savePredictions = TRUE
)
cv_model <- train(
outcome ~ .,
data = train_data,
method = "rf",
trControl = train_control
)
print(cv_model)
---
title: "Analysis Report"
author: "Data Scientist"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
---
## Introduction
This analysis explores the relationship between variables.
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
df <- read.csv("data.csv")
head(df)
ggplot(df, aes(x = x, y = y)) +
geom_point() +
theme_minimal()
分析显示相关性为 r cor(df$x, df$y)。
## 数据导入/导出
```r
# CSV
df <- read.csv("data.csv")
write.csv(df, "output.csv", row.names = FALSE)
# Excel
library(readxl)
library(writexl)
df <- read_excel("data.xlsx", sheet = "Sheet1")
write_xlsx(df, "output.xlsx")
# JSON
library(jsonlite)
df <- fromJSON("data.json")
write_json(df, "output.json")
# Database
library(DBI)
library(RSQLite)
con <- dbConnect(SQLite(), "database.db")
df <- dbReadTable(con, "table_name")
dbWriteTable(con, "new_table", df)
dbDisconnect(con)
# Web APIs
library(httr)
response <- GET("https://api.example.com/data")
data <- content(response, as = "parsed")
❌ 在循环中增长向量 ❌ 不设置随机种子 ❌ 忽略 NA 值 ❌ 使用 attach() ❌ 不记录代码 ❌ 硬编码文件路径 ❌ 不检查假设
每周安装数
70
代码仓库
GitHub 星标数
11
首次出现
2026年1月24日
安全审计
安装于
opencode61
codex59
gemini-cli56
github-copilot52
cursor52
claude-code47
Expert guidance for R programming, statistical analysis, data visualization, and data science.
# Vectors
numbers <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie")
# Data frames
df <- data.frame(
id = 1:5,
name = c("Alice", "Bob", "Charlie", "David", "Eve"),
age = c(25, 30, 35, 28, 32),
salary = c(50000, 60000, 55000, 52000, 58000)
)
# Subsetting
df[df$age > 30, ] # Rows where age > 30
df[, c("name", "age")] # Select columns
# Functions
calculate_mean <- function(x) {
sum(x) / length(x)
}
# Apply family
sapply(df$age, function(x) x * 2)
lapply(list(1:5, 6:10), sum)
# Control structures
if (mean(df$age) > 30) {
print("Average age is above 30")
} else {
print("Average age is 30 or below")
}
# Loops
for (i in 1:nrow(df)) {
print(df$name[i])
}
library(dplyr)
library(tidyr)
library(stringr)
# dplyr operations
df %>%
filter(age > 28) %>%
select(name, age, salary) %>%
mutate(
salary_bonus = salary * 1.1,
age_group = case_when(
age < 30 ~ "Young",
age < 35 ~ "Mid-career",
TRUE ~ "Senior"
)
) %>%
arrange(desc(salary)) %>%
group_by(age_group) %>%
summarise(
count = n(),
avg_salary = mean(salary),
total_salary = sum(salary)
)
# Reshaping data
wide_data <- data.frame(
id = 1:3,
year_2021 = c(100, 200, 150),
year_2022 = c(120, 210, 160)
)
# Wide to long
long_data <- wide_data %>%
pivot_longer(
cols = starts_with("year"),
names_to = "year",
values_to = "value",
names_prefix = "year_"
)
# Long to wide
wide_again <- long_data %>%
pivot_wider(
names_from = year,
values_from = value,
names_prefix = "year_"
)
# String operations
df %>%
mutate(
name_upper = str_to_upper(name),
name_length = str_length(name),
first_letter = str_sub(name, 1, 1)
)
# Joining data
df1 <- data.frame(id = 1:3, value1 = c("A", "B", "C"))
df2 <- data.frame(id = 2:4, value2 = c("X", "Y", "Z"))
inner_join(df1, df2, by = "id")
left_join(df1, df2, by = "id")
full_join(df1, df2, by = "id")
library(ggplot2)
# Basic scatter plot
ggplot(df, aes(x = age, y = salary)) +
geom_point(size = 3, color = "blue") +
geom_smooth(method = "lm", se = TRUE) +
labs(
title = "Age vs Salary",
x = "Age (years)",
y = "Salary ($)"
) +
theme_minimal()
# Bar plot with facets
ggplot(df, aes(x = name, y = salary, fill = age_group)) +
geom_col() +
facet_wrap(~ age_group) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Box plot
ggplot(df, aes(x = age_group, y = salary)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.2, alpha = 0.5)
# Histogram with density
ggplot(df, aes(x = salary)) +
geom_histogram(aes(y = ..density..), bins = 10, fill = "steelblue") +
geom_density(color = "red", size = 1)
# Time series
ggplot(time_series_df, aes(x = date, y = value)) +
geom_line(color = "darkgreen") +
geom_point() +
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Descriptive statistics
summary(df)
mean(df$age)
median(df$salary)
sd(df$age)
var(df$salary)
quantile(df$age, probs = c(0.25, 0.5, 0.75))
# Correlation
cor(df$age, df$salary)
cor.test(df$age, df$salary)
# T-test
t.test(df$salary ~ df$gender)
# ANOVA
model <- aov(salary ~ age_group, data = df)
summary(model)
TukeyHSD(model)
# Linear regression
lm_model <- lm(salary ~ age + experience, data = df)
summary(lm_model)
# Predictions
new_data <- data.frame(age = c(30, 35), experience = c(5, 8))
predict(lm_model, new_data, interval = "confidence")
# Multiple regression
multi_model <- lm(salary ~ age + experience + education, data = df)
summary(multi_model)
# Check assumptions
par(mfrow = c(2, 2))
plot(multi_model)
# Logistic regression
logit_model <- glm(outcome ~ age + salary,
data = df,
family = binomial(link = "logit"))
summary(logit_model)
library(forecast)
# Create time series
ts_data <- ts(data, start = c(2020, 1), frequency = 12)
# Decomposition
decomposed <- decompose(ts_data)
plot(decomposed)
# ARIMA model
auto_arima <- auto.arima(ts_data)
summary(auto_arima)
# Forecasting
forecast_result <- forecast(auto_arima, h = 12)
plot(forecast_result)
# Accuracy metrics
accuracy(forecast_result)
library(caret)
library(randomForest)
# Split data
set.seed(123)
train_index <- createDataPartition(df$outcome, p = 0.8, list = FALSE)
train_data <- df[train_index, ]
test_data <- df[-train_index, ]
# Train model
rf_model <- randomForest(
outcome ~ .,
data = train_data,
ntree = 500,
importance = TRUE
)
# Predictions
predictions <- predict(rf_model, test_data)
# Confusion matrix
confusionMatrix(predictions, test_data$outcome)
# Feature importance
importance(rf_model)
varImpPlot(rf_model)
# Cross-validation
train_control <- trainControl(
method = "cv",
number = 10,
savePredictions = TRUE
)
cv_model <- train(
outcome ~ .,
data = train_data,
method = "rf",
trControl = train_control
)
print(cv_model)
---
title: "Analysis Report"
author: "Data Scientist"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
---
## Introduction
This analysis explores the relationship between variables.
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
df <- read.csv("data.csv")
head(df)
ggplot(df, aes(x = x, y = y)) +
geom_point() +
theme_minimal()
The analysis shows that r cor(df$x, df$y) correlation.
## Data Import/Export
```r
# CSV
df <- read.csv("data.csv")
write.csv(df, "output.csv", row.names = FALSE)
# Excel
library(readxl)
library(writexl)
df <- read_excel("data.xlsx", sheet = "Sheet1")
write_xlsx(df, "output.xlsx")
# JSON
library(jsonlite)
df <- fromJSON("data.json")
write_json(df, "output.json")
# Database
library(DBI)
library(RSQLite)
con <- dbConnect(SQLite(), "database.db")
df <- dbReadTable(con, "table_name")
dbWriteTable(con, "new_table", df)
dbDisconnect(con)
# Web APIs
library(httr)
response <- GET("https://api.example.com/data")
data <- content(response, as = "parsed")
❌ Growing vectors in loops ❌ Not setting random seed ❌ Ignoring NA values ❌ Using attach() ❌ Not documenting code ❌ Hardcoding file paths ❌ Not checking assumptions
Weekly Installs
70
Repository
GitHub Stars
11
First Seen
Jan 24, 2026
Security Audits
Gen Agent Trust HubPassSocketWarnSnykPass
Installed on
opencode61
codex59
gemini-cli56
github-copilot52
cursor52
claude-code47
前端代码审计工具 - 自动化检测可访问性、性能、响应式设计、主题化与反模式
49,600 周安装
Azure 工作负载自动升级评估工具 - 支持 Functions、App Service 计划与 SKU 迁移
303 周安装
Tauri 跨平台桌面应用开发指南:TypeScript + Rust 全栈实战教程
306 周安装
Fastify OAuth 2.0/2.1 实现与调试指南:授权码+PKCE流程详解
301 周安装
FastAPI专家技能:高性能异步API开发、微服务架构与生产部署指南
306 周安装
移动端响应式设计指南:CSS断点、Tailwind与触摸交互实现移动优先Web应用
302 周安装
GitHub 问题自动创建与优化工具 - 支持 Bug、功能、用户故事和任务
336 周安装