重要前提
安装AI Skills的关键前提是:必须科学上网,且开启TUN模式,这一点至关重要,直接决定安装能否顺利完成,在此郑重提醒三遍:科学上网,科学上网,科学上网。查看完整安装教程 →
biological-expert by personamanagmentlayer/pcl
npx skills add https://github.com/personamanagmentlayer/pcl --skill biological-expert为生物学、生物技术、遗传学、生物信息学和计算生物学应用提供专家指导。
from Bio import SeqIO, Seq
from Bio.Seq import Seq
from Bio.SeqUtils import gc_fraction, molecular_weight
from typing import Dict, List
class DNAAnalyzer:
"""分析 DNA 序列"""
def __init__(self, sequence: str):
self.sequence = Seq(sequence.upper())
def basic_stats(self) -> Dict:
"""计算基本序列统计信息"""
return {
"length": len(self.sequence),
"gc_content": gc_fraction(self.sequence) * 100,
"molecular_weight": molecular_weight(self.sequence, "DNA"),
"nucleotide_counts": self._count_nucleotides()
}
def _count_nucleotides(self) -> Dict[str, int]:
"""统计每种核苷酸"""
return {
'A': self.sequence.count('A'),
'T': self.sequence.count('T'),
'G': self.sequence.count('G'),
'C': self.sequence.count('C')
}
def transcribe(self) -> str:
"""将 DNA 转录为 RNA"""
return str(self.sequence.transcribe())
def translate(self, table: int = 1) -> str:
"""将 DNA 翻译为蛋白质"""
return str(self.sequence.translate(table=table))
def reverse_complement(self) -> str:
"""获取反向互补序列"""
return str(self.sequence.reverse_complement())
def find_orfs(self, min_length: int = 100) -> List[Dict]:
"""查找开放阅读框"""
orfs = []
for strand, seq in [(+1, self.sequence), (-1, self.sequence.reverse_complement())]:
for frame in range(3):
trans = seq[frame:].translate(to_stop=False)
for i, aa in enumerate(trans):
if aa == 'M': # 起始密码子
for j in range(i + 1, len(trans)):
if trans[j] == '*': # 终止密码子
orf_len = (j - i) * 3
if orf_len >= min_length:
orfs.append({
"strand": strand,
"frame": frame,
"start": i * 3 + frame,
"end": j * 3 + frame,
"length": orf_len,
"protein": str(trans[i:j])
})
break
return orfs
def find_motif(self, motif: str) -> List[int]:
"""在序列中查找基序位置"""
positions = []
motif = motif.upper()
for i in range(len(self.sequence) - len(motif) + 1):
if str(self.sequence[i:i+len(motif)]) == motif:
positions.append(i)
return positions
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import numpy as np
class SequenceAligner:
"""执行序列比对"""
@staticmethod
def global_alignment(seq1: str, seq2: str,
match: float = 2,
mismatch: float = -1,
gap_open: float = -0.5,
gap_extend: float = -0.1):
"""执行全局比对(Needleman-Wunsch)"""
alignments = pairwise2.align.globalms(
seq1, seq2,
match, mismatch,
gap_open, gap_extend
)
best = alignments[0]
return {
"aligned_seq1": best.seqA,
"aligned_seq2": best.seqB,
"score": best.score,
"identity": SequenceAligner._calculate_identity(best.seqA, best.seqB)
}
@staticmethod
def local_alignment(seq1: str, seq2: str,
match: float = 2,
mismatch: float = -1,
gap_open: float = -0.5,
gap_extend: float = -0.1):
"""执行局部比对(Smith-Waterman)"""
alignments = pairwise2.align.localms(
seq1, seq2,
match, mismatch,
gap_open, gap_extend
)
best = alignments[0]
return {
"aligned_seq1": best.seqA,
"aligned_seq2": best.seqB,
"score": best.score,
"identity": SequenceAligner._calculate_identity(best.seqA, best.seqB)
}
@staticmethod
def _calculate_identity(seq1: str, seq2: str) -> float:
"""计算序列一致性百分比"""
matches = sum(1 for a, b in zip(seq1, seq2) if a == b and a != '-')
return (matches / min(len(seq1), len(seq2))) * 100
from dataclasses import dataclass
from typing import Optional
@dataclass
class Variant:
chromosome: str
position: int
reference: str
alternate: str
quality: float
genotype: str
depth: int
allele_frequency: Optional[float] = None
class VariantAnnotator:
"""注释遗传变异"""
def __init__(self):
self.gene_annotations = {}
def annotate_variant(self, variant: Variant) -> Dict:
"""注释变异的功能性后果"""
annotation = {
"variant": f"{variant.chromosome}:{variant.position}{variant.reference}>{variant.alternate}",
"type": self._classify_variant_type(variant),
"effect": self._predict_effect(variant),
"quality": variant.quality,
"depth": variant.depth
}
if variant.allele_frequency:
annotation["allele_frequency"] = variant.allele_frequency
annotation["rarity"] = self._classify_rarity(variant.allele_frequency)
return annotation
def _classify_variant_type(self, variant: Variant) -> str:
"""分类变异类型"""
ref_len = len(variant.reference)
alt_len = len(variant.alternate)
if ref_len == 1 and alt_len == 1:
return "SNV" # 单核苷酸变异
elif ref_len < alt_len:
return "INSERTION"
elif ref_len > alt_len:
return "DELETION"
else:
return "INDEL"
def _predict_effect(self, variant: Variant) -> str:
"""预测变异对蛋白质的影响"""
# 简化的效应预测
if self._classify_variant_type(variant) == "SNV":
# 将检查是否在编码区、是否导致终止密码子等
return "MISSENSE"
return "UNKNOWN"
def _classify_rarity(self, af: float) -> str:
"""分类变异稀有度"""
if af > 0.05:
return "COMMON"
elif af > 0.01:
return "LOW_FREQUENCY"
else:
return "RARE"
import pandas as pd
import numpy as np
from scipy import stats
class RNASeqAnalyzer:
"""分析 RNA-seq 表达数据"""
def __init__(self, counts_matrix: pd.DataFrame):
"""
counts_matrix: 基因 x 样本的原始计数矩阵
"""
self.counts = counts_matrix
self.normalized = None
def normalize_counts(self, method: str = "tpm"):
"""标准化计数数据"""
if method == "tpm":
# 每百万转录本
self.normalized = (self.counts / self.counts.sum(axis=0)) * 1e6
elif method == "log2":
# Log2 转换
self.normalized = np.log2(self.counts + 1)
return self.normalized
def differential_expression(self, condition1: List[str],
condition2: List[str],
method: str = "ttest") -> pd.DataFrame:
"""执行差异表达分析"""
results = []
for gene in self.counts.index:
expr1 = self.counts.loc[gene, condition1]
expr2 = self.counts.loc[gene, condition2]
if method == "ttest":
statistic, pvalue = stats.ttest_ind(expr1, expr2)
fc = expr2.mean() / (expr1.mean() + 1)
log2fc = np.log2(fc)
results.append({
"gene": gene,
"mean_condition1": expr1.mean(),
"mean_condition2": expr2.mean(),
"fold_change": fc,
"log2_fold_change": log2fc,
"p_value": pvalue,
"significant": pvalue < 0.05 and abs(log2fc) > 1
})
return pd.DataFrame(results)
def identify_marker_genes(self, threshold_fc: float = 2,
threshold_pval: float = 0.05) -> List[str]:
"""识别显著差异表达的基因"""
# 这将使用 differential_expression 的结果
pass
❌ 不对输入数据进行质量控制 ❌ 忽略批次效应 ❌ 不进行多重检验校正 ❌ 过度解读相关性 ❌ 样本量不足 ❌ 不验证计算预测 ❌ 忽略生物学背景
每周安装次数
53
代码仓库
GitHub 星标数
11
首次出现
2026 年 1 月 24 日
安全审计
安装于
opencode46
codex44
gemini-cli41
cursor39
github-copilot38
kimi-cli36
Expert guidance for biology, biotechnology, genetics, bioinformatics, and computational biology applications.
from Bio import SeqIO, Seq
from Bio.Seq import Seq
from Bio.SeqUtils import gc_fraction, molecular_weight
from typing import Dict, List
class DNAAnalyzer:
"""Analyze DNA sequences"""
def __init__(self, sequence: str):
self.sequence = Seq(sequence.upper())
def basic_stats(self) -> Dict:
"""Calculate basic sequence statistics"""
return {
"length": len(self.sequence),
"gc_content": gc_fraction(self.sequence) * 100,
"molecular_weight": molecular_weight(self.sequence, "DNA"),
"nucleotide_counts": self._count_nucleotides()
}
def _count_nucleotides(self) -> Dict[str, int]:
"""Count each nucleotide"""
return {
'A': self.sequence.count('A'),
'T': self.sequence.count('T'),
'G': self.sequence.count('G'),
'C': self.sequence.count('C')
}
def transcribe(self) -> str:
"""Transcribe DNA to RNA"""
return str(self.sequence.transcribe())
def translate(self, table: int = 1) -> str:
"""Translate DNA to protein"""
return str(self.sequence.translate(table=table))
def reverse_complement(self) -> str:
"""Get reverse complement"""
return str(self.sequence.reverse_complement())
def find_orfs(self, min_length: int = 100) -> List[Dict]:
"""Find Open Reading Frames"""
orfs = []
for strand, seq in [(+1, self.sequence), (-1, self.sequence.reverse_complement())]:
for frame in range(3):
trans = seq[frame:].translate(to_stop=False)
for i, aa in enumerate(trans):
if aa == 'M': # Start codon
for j in range(i + 1, len(trans)):
if trans[j] == '*': # Stop codon
orf_len = (j - i) * 3
if orf_len >= min_length:
orfs.append({
"strand": strand,
"frame": frame,
"start": i * 3 + frame,
"end": j * 3 + frame,
"length": orf_len,
"protein": str(trans[i:j])
})
break
return orfs
def find_motif(self, motif: str) -> List[int]:
"""Find motif positions in sequence"""
positions = []
motif = motif.upper()
for i in range(len(self.sequence) - len(motif) + 1):
if str(self.sequence[i:i+len(motif)]) == motif:
positions.append(i)
return positions
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import numpy as np
class SequenceAligner:
"""Perform sequence alignments"""
@staticmethod
def global_alignment(seq1: str, seq2: str,
match: float = 2,
mismatch: float = -1,
gap_open: float = -0.5,
gap_extend: float = -0.1):
"""Perform global alignment (Needleman-Wunsch)"""
alignments = pairwise2.align.globalms(
seq1, seq2,
match, mismatch,
gap_open, gap_extend
)
best = alignments[0]
return {
"aligned_seq1": best.seqA,
"aligned_seq2": best.seqB,
"score": best.score,
"identity": SequenceAligner._calculate_identity(best.seqA, best.seqB)
}
@staticmethod
def local_alignment(seq1: str, seq2: str,
match: float = 2,
mismatch: float = -1,
gap_open: float = -0.5,
gap_extend: float = -0.1):
"""Perform local alignment (Smith-Waterman)"""
alignments = pairwise2.align.localms(
seq1, seq2,
match, mismatch,
gap_open, gap_extend
)
best = alignments[0]
return {
"aligned_seq1": best.seqA,
"aligned_seq2": best.seqB,
"score": best.score,
"identity": SequenceAligner._calculate_identity(best.seqA, best.seqB)
}
@staticmethod
def _calculate_identity(seq1: str, seq2: str) -> float:
"""Calculate sequence identity percentage"""
matches = sum(1 for a, b in zip(seq1, seq2) if a == b and a != '-')
return (matches / min(len(seq1), len(seq2))) * 100
from dataclasses import dataclass
from typing import Optional
@dataclass
class Variant:
chromosome: str
position: int
reference: str
alternate: str
quality: float
genotype: str
depth: int
allele_frequency: Optional[float] = None
class VariantAnnotator:
"""Annotate genetic variants"""
def __init__(self):
self.gene_annotations = {}
def annotate_variant(self, variant: Variant) -> Dict:
"""Annotate variant with functional consequences"""
annotation = {
"variant": f"{variant.chromosome}:{variant.position}{variant.reference}>{variant.alternate}",
"type": self._classify_variant_type(variant),
"effect": self._predict_effect(variant),
"quality": variant.quality,
"depth": variant.depth
}
if variant.allele_frequency:
annotation["allele_frequency"] = variant.allele_frequency
annotation["rarity"] = self._classify_rarity(variant.allele_frequency)
return annotation
def _classify_variant_type(self, variant: Variant) -> str:
"""Classify variant type"""
ref_len = len(variant.reference)
alt_len = len(variant.alternate)
if ref_len == 1 and alt_len == 1:
return "SNV" # Single Nucleotide Variant
elif ref_len < alt_len:
return "INSERTION"
elif ref_len > alt_len:
return "DELETION"
else:
return "INDEL"
def _predict_effect(self, variant: Variant) -> str:
"""Predict variant effect on protein"""
# Simplified effect prediction
if self._classify_variant_type(variant) == "SNV":
# Would check if it's in coding region, causes stop codon, etc.
return "MISSENSE"
return "UNKNOWN"
def _classify_rarity(self, af: float) -> str:
"""Classify variant rarity"""
if af > 0.05:
return "COMMON"
elif af > 0.01:
return "LOW_FREQUENCY"
else:
return "RARE"
import pandas as pd
import numpy as np
from scipy import stats
class RNASeqAnalyzer:
"""Analyze RNA-seq expression data"""
def __init__(self, counts_matrix: pd.DataFrame):
"""
counts_matrix: genes x samples matrix of raw counts
"""
self.counts = counts_matrix
self.normalized = None
def normalize_counts(self, method: str = "tpm"):
"""Normalize count data"""
if method == "tpm":
# Transcripts Per Million
self.normalized = (self.counts / self.counts.sum(axis=0)) * 1e6
elif method == "log2":
# Log2 transformation
self.normalized = np.log2(self.counts + 1)
return self.normalized
def differential_expression(self, condition1: List[str],
condition2: List[str],
method: str = "ttest") -> pd.DataFrame:
"""Perform differential expression analysis"""
results = []
for gene in self.counts.index:
expr1 = self.counts.loc[gene, condition1]
expr2 = self.counts.loc[gene, condition2]
if method == "ttest":
statistic, pvalue = stats.ttest_ind(expr1, expr2)
fc = expr2.mean() / (expr1.mean() + 1)
log2fc = np.log2(fc)
results.append({
"gene": gene,
"mean_condition1": expr1.mean(),
"mean_condition2": expr2.mean(),
"fold_change": fc,
"log2_fold_change": log2fc,
"p_value": pvalue,
"significant": pvalue < 0.05 and abs(log2fc) > 1
})
return pd.DataFrame(results)
def identify_marker_genes(self, threshold_fc: float = 2,
threshold_pval: float = 0.05) -> List[str]:
"""Identify significantly differentially expressed genes"""
# This would use the differential_expression results
pass
❌ No quality control of input data ❌ Ignoring batch effects ❌ No multiple testing correction ❌ Over-interpreting correlations ❌ Inadequate sample sizes ❌ Not validating computational predictions ❌ Ignoring biological context
Weekly Installs
53
Repository
GitHub Stars
11
First Seen
Jan 24, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
opencode46
codex44
gemini-cli41
cursor39
github-copilot38
kimi-cli36
marimo-batch:Python批处理任务神器,Pydantic声明式数据源与UI/CLI双模式
973 周安装
Azure Web PubSub Java SDK - 实时消息通信与WebSocket服务开发指南
1 周安装
Azure Key Vault Keys Rust 客户端库 - 安全密钥管理与加密解决方案
1 周安装
Azure Identity Java SDK - 微软Azure身份验证Java客户端库 | 安全云服务开发
1 周安装
Azure Event Hub Python 客户端库 - 实时数据流处理与云消息队列解决方案
1 周安装
Azure Event Grid .NET SDK - 事件驱动架构与云服务集成开发工具
1 周安装
PDForge API (PDF Noodle) - 从HTML/模板生成PDF和PNG的API服务
62 周安装