natural-language by dpearson2699/swift-ios-skills
npx skills add https://github.com/dpearson2699/swift-ios-skills --skill natural-language分析自然语言文本,进行分词、词性标注、命名实体识别、情感分析、语言识别以及词/句向量嵌入。使用 Translation 框架在不同语言之间翻译文本。目标版本为 Swift 6.2 / iOS 26+。
此技能涵盖两个相关框架:用于设备端文本分析的 NaturalLanguage (
NLTokenizer,NLTagger,NLEmbedding) 和用于语言翻译的 Translation (TranslationSession,LanguageAvailability)。
导入 NaturalLanguage 用于文本分析,导入 用于语言翻译。NaturalLanguage 不需要特殊的权限或能力。Translation 需要 iOS 17.4+ / macOS 14.4+。
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
Translationimport NaturalLanguage
import Translation
NaturalLanguage 类 (NLTokenizer, NLTagger) 不是线程安全的。请确保每个实例在同一时间只在一个线程或调度队列中使用。
使用 NLTokenizer 将文本分割成单词、句子或段落。
import NaturalLanguage
func tokenizeWords(in text: String) -> [String] {
let tokenizer = NLTokenizer(unit: .word)
tokenizer.string = text
let range = text.startIndex..<text.endIndex
return tokenizer.tokens(for: range).map { String(text[$0]) }
}
| 单位 | 描述 |
|---|---|
.word | 单个单词 |
.sentence | 句子 |
.paragraph | 段落 |
.document | 整个文档 |
使用 enumerateTokens(in:using:) 来检测数字或表情符号标记。
let tokenizer = NLTokenizer(unit: .word)
tokenizer.string = text
tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, attributes in
if attributes.contains(.numeric) {
print("Number: \(text[range])")
}
return true // 继续枚举
}
使用 NLLanguageRecognizer 检测字符串的主要语言。
func detectLanguage(for text: String) -> NLLanguage? {
NLLanguageRecognizer.dominantLanguage(for: text)
}
// 获取带有置信度分数的多种语言假设
func languageHypotheses(for text: String, max: Int = 5) -> [NLLanguage: Double] {
let recognizer = NLLanguageRecognizer()
recognizer.processString(text)
return recognizer.languageHypotheses(withMaximum: max)
}
将识别器限制在预期的语言范围内,可以提高短文本的识别准确度。
let recognizer = NLLanguageRecognizer()
recognizer.languageConstraints = [.english, .french, .spanish]
recognizer.processString(text)
let detected = recognizer.dominantLanguage
使用 NLTagger 识别名词、动词、形容词和其他词类。
func tagPartsOfSpeech(in text: String) -> [(String, NLTag)] {
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = text
var results: [(String, NLTag)] = []
let range = text.startIndex..<text.endIndex
let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace]
tagger.enumerateTags(in: range, unit: .word, scheme: .lexicalClass, options: options) { tag, tokenRange in
if let tag {
results.append((String(text[tokenRange]), tag))
}
return true
}
return results
}
| 方案 | 输出 |
|---|---|
.lexicalClass | 词性(名词、动词、形容词) |
.nameType | 命名实体类型(人物、地点、组织) |
.nameTypeOrLexicalClass | 命名实体识别 + 词性标注组合 |
.lemma | 单词的基本形式 |
.language | 每个标记的语言 |
.sentimentScore | 情感极性分数 |
提取人物、地点和组织。
func extractEntities(from text: String) -> [(String, NLTag)] {
let tagger = NLTagger(tagSchemes: [.nameType])
tagger.string = text
var entities: [(String, NLTag)] = []
let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace, .joinNames]
tagger.enumerateTags(
in: text.startIndex..<text.endIndex,
unit: .word,
scheme: .nameType,
options: options
) { tag, tokenRange in
if let tag, tag != .other {
entities.append((String(text[tokenRange]), tag))
}
return true
}
return entities
}
// NLTag 值: .personalName, .placeName, .organizationName
对文本情感进行评分,范围从 -1.0(负面)到 +1.0(正面)。
func sentimentScore(for text: String) -> Double? {
let tagger = NLTagger(tagSchemes: [.sentimentScore])
tagger.string = text
let (tag, _) = tagger.tag(
at: text.startIndex,
unit: .paragraph,
scheme: .sentimentScore
)
return tag.flatMap { Double($0.rawValue) }
}
使用 NLEmbedding 测量单词或句子之间的语义相似度。
func wordSimilarity(_ word1: String, _ word2: String) -> Double? {
guard let embedding = NLEmbedding.wordEmbedding(for: .english) else { return nil }
return embedding.distance(between: word1, and: word2, distanceType: .cosine)
}
func findSimilarWords(to word: String, count: Int = 5) -> [(String, Double)] {
guard let embedding = NLEmbedding.wordEmbedding(for: .english) else { return [] }
return embedding.neighbors(for: word, maximumCount: count, distanceType: .cosine)
}
句子嵌入用于比较整个句子。
func sentenceSimilarity(_ s1: String, _ s2: String) -> Double? {
guard let embedding = NLEmbedding.sentenceEmbedding(for: .english) else { return nil }
return embedding.distance(between: s1, and: s2, distanceType: .cosine)
}
使用 .translationPresentation() 显示内置的翻译界面。
import SwiftUI
import Translation
struct TranslatableView: View {
@State private var showTranslation = false
let text = "Hello, how are you?"
var body: some View {
Text(text)
.onTapGesture { showTranslation = true }
.translationPresentation(
isPresented: $showTranslation,
text: text
)
}
}
在视图上下文中使用 .translationTask() 进行编程式翻译。
struct TranslatingView: View {
@State private var translatedText = ""
@State private var configuration: TranslationSession.Configuration?
var body: some View {
VStack {
Text(translatedText)
Button("Translate") {
configuration = .init(source: Locale.Language(identifier: "en"),
target: Locale.Language(identifier: "es"))
}
}
.translationTask(configuration) { session in
let response = try await session.translate("Hello, world!")
translatedText = response.targetText
}
}
}
在单个会话中翻译多个字符串。
.translationTask(configuration) { session in
let requests = texts.enumerated().map { index, text in
TranslationSession.Request(sourceText: text,
clientIdentifier: "\(index)")
}
let responses = try await session.translations(from: requests)
for response in responses {
print("\(response.sourceText) -> \(response.targetText)")
}
}
let availability = LanguageAvailability()
let status = await availability.status(
from: Locale.Language(identifier: "en"),
to: Locale.Language(identifier: "ja")
)
switch status {
case .installed: break // 已安装,可离线翻译
case .supported: break // 支持,需要下载
case .unsupported: break // 不支持的语言对
}
这些类不是线程安全的,会导致结果错误或崩溃。
// 错误示例
let sharedTagger = NLTagger(tagSchemes: [.lexicalClass])
DispatchQueue.concurrentPerform(iterations: 10) { _ in
sharedTagger.string = someText // 数据竞争
}
// 正确示例
await withTaskGroup(of: Void.self) { group in
for _ in 0..<10 {
group.addTask {
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = someText
// 处理...
}
}
}
NaturalLanguage 提供内置的语言分析。使用 Core ML 处理自定义训练的模型。它们通过 NLModel 相互补充。
// 错误:尝试使用原始的 Core ML 进行命名实体识别
let coreMLModel = try MLModel(contentsOf: modelURL)
// 正确:使用 NLTagger 进行内置的命名实体识别
let tagger = NLTagger(tagSchemes: [.nameType])
// 或者通过 NLModel 加载自定义的 Core ML 模型
let nlModel = try NLModel(mlModel: coreMLModel)
tagger.setModels([nlModel], forTagScheme: .nameType)
并非所有语言在设备上都有可用的词或句嵌入向量。
// 错误:强制解包
let embedding = NLEmbedding.wordEmbedding(for: .japanese)!
// 正确:处理 nil 值
guard let embedding = NLEmbedding.wordEmbedding(for: .japanese) else {
// 该语言没有可用的嵌入向量
return
}
创建和配置标注器开销很大。对同一文本应复用标注器。
// 错误:为每个单词创建新的标注器
for word in words {
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = word
}
// 正确:设置一次字符串,然后枚举
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = fullText
tagger.enumerateTags(in: fullText.startIndex..<fullText.endIndex,
unit: .word, scheme: .lexicalClass, options: []) { tag, range in
return true
}
短字符串(约 20 个字符以下)的语言检测不可靠。设置约束或提示以提高准确度。
// 错误:检测单个单词的语言
let lang = NLLanguageRecognizer.dominantLanguage(for: "chat") // 法语还是英语?
// 正确:提供上下文
let recognizer = NLLanguageRecognizer()
recognizer.languageHints = [.english: 0.8, .french: 0.2]
recognizer.processString("chat")
NLTokenizer 和 NLTagger 实例在单个线程中使用NLEmbedding 前检查了可用性(如果不可用则返回 nil)LanguageAvailability.translationTask() 在 SwiftUI 视图层次结构中使用clientIdentifier 来匹配响应和请求.joinNames 选项以保持多词名称在一起NLModel 加载,而不是原始的 Core MLreferences/translation-patterns.md每周安装量
333
代码仓库
GitHub 星标数
269
首次出现
2026年3月8日
安全审计
安装于
codex330
opencode327
github-copilot327
amp327
cline327
kimi-cli327
Analyze natural language text for tokenization, part-of-speech tagging, named entity recognition, sentiment analysis, language identification, and word/sentence embeddings. Translate text between languages with the Translation framework. Targets Swift 6.2 / iOS 26+.
This skill covers two related frameworks: NaturalLanguage (
NLTokenizer,NLTagger,NLEmbedding) for on-device text analysis, and Translation (TranslationSession,LanguageAvailability) for language translation.
Import NaturalLanguage for text analysis and Translation for language translation. No special entitlements or capabilities are required for NaturalLanguage. Translation requires iOS 17.4+ / macOS 14.4+.
import NaturalLanguage
import Translation
NaturalLanguage classes (NLTokenizer, NLTagger) are not thread-safe. Use each instance from one thread or dispatch queue at a time.
Segment text into words, sentences, or paragraphs with NLTokenizer.
import NaturalLanguage
func tokenizeWords(in text: String) -> [String] {
let tokenizer = NLTokenizer(unit: .word)
tokenizer.string = text
let range = text.startIndex..<text.endIndex
return tokenizer.tokens(for: range).map { String(text[$0]) }
}
| Unit | Description |
|---|---|
.word | Individual words |
.sentence | Sentences |
.paragraph | Paragraphs |
.document | Entire document |
Use enumerateTokens(in:using:) to detect numeric or emoji tokens.
let tokenizer = NLTokenizer(unit: .word)
tokenizer.string = text
tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, attributes in
if attributes.contains(.numeric) {
print("Number: \(text[range])")
}
return true // continue enumeration
}
Detect the dominant language of a string with NLLanguageRecognizer.
func detectLanguage(for text: String) -> NLLanguage? {
NLLanguageRecognizer.dominantLanguage(for: text)
}
// Multiple hypotheses with confidence scores
func languageHypotheses(for text: String, max: Int = 5) -> [NLLanguage: Double] {
let recognizer = NLLanguageRecognizer()
recognizer.processString(text)
return recognizer.languageHypotheses(withMaximum: max)
}
Constrain the recognizer to expected languages for better accuracy on short text.
let recognizer = NLLanguageRecognizer()
recognizer.languageConstraints = [.english, .french, .spanish]
recognizer.processString(text)
let detected = recognizer.dominantLanguage
Identify nouns, verbs, adjectives, and other lexical classes with NLTagger.
func tagPartsOfSpeech(in text: String) -> [(String, NLTag)] {
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = text
var results: [(String, NLTag)] = []
let range = text.startIndex..<text.endIndex
let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace]
tagger.enumerateTags(in: range, unit: .word, scheme: .lexicalClass, options: options) { tag, tokenRange in
if let tag {
results.append((String(text[tokenRange]), tag))
}
return true
}
return results
}
| Scheme | Output |
|---|---|
.lexicalClass | Part of speech (noun, verb, adjective) |
.nameType | Named entity type (person, place, organization) |
.nameTypeOrLexicalClass | Combined NER + POS |
.lemma | Base form of a word |
.language | Per-token language |
.sentimentScore | Sentiment polarity score |
Extract people, places, and organizations.
func extractEntities(from text: String) -> [(String, NLTag)] {
let tagger = NLTagger(tagSchemes: [.nameType])
tagger.string = text
var entities: [(String, NLTag)] = []
let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace, .joinNames]
tagger.enumerateTags(
in: text.startIndex..<text.endIndex,
unit: .word,
scheme: .nameType,
options: options
) { tag, tokenRange in
if let tag, tag != .other {
entities.append((String(text[tokenRange]), tag))
}
return true
}
return entities
}
// NLTag values: .personalName, .placeName, .organizationName
Score text sentiment from -1.0 (negative) to +1.0 (positive).
func sentimentScore(for text: String) -> Double? {
let tagger = NLTagger(tagSchemes: [.sentimentScore])
tagger.string = text
let (tag, _) = tagger.tag(
at: text.startIndex,
unit: .paragraph,
scheme: .sentimentScore
)
return tag.flatMap { Double($0.rawValue) }
}
Measure semantic similarity between words or sentences with NLEmbedding.
func wordSimilarity(_ word1: String, _ word2: String) -> Double? {
guard let embedding = NLEmbedding.wordEmbedding(for: .english) else { return nil }
return embedding.distance(between: word1, and: word2, distanceType: .cosine)
}
func findSimilarWords(to word: String, count: Int = 5) -> [(String, Double)] {
guard let embedding = NLEmbedding.wordEmbedding(for: .english) else { return [] }
return embedding.neighbors(for: word, maximumCount: count, distanceType: .cosine)
}
Sentence embeddings compare entire sentences.
func sentenceSimilarity(_ s1: String, _ s2: String) -> Double? {
guard let embedding = NLEmbedding.sentenceEmbedding(for: .english) else { return nil }
return embedding.distance(between: s1, and: s2, distanceType: .cosine)
}
Show the built-in translation UI with .translationPresentation().
import SwiftUI
import Translation
struct TranslatableView: View {
@State private var showTranslation = false
let text = "Hello, how are you?"
var body: some View {
Text(text)
.onTapGesture { showTranslation = true }
.translationPresentation(
isPresented: $showTranslation,
text: text
)
}
}
Use .translationTask() for programmatic translations within a view context.
struct TranslatingView: View {
@State private var translatedText = ""
@State private var configuration: TranslationSession.Configuration?
var body: some View {
VStack {
Text(translatedText)
Button("Translate") {
configuration = .init(source: Locale.Language(identifier: "en"),
target: Locale.Language(identifier: "es"))
}
}
.translationTask(configuration) { session in
let response = try await session.translate("Hello, world!")
translatedText = response.targetText
}
}
}
Translate multiple strings in a single session.
.translationTask(configuration) { session in
let requests = texts.enumerated().map { index, text in
TranslationSession.Request(sourceText: text,
clientIdentifier: "\(index)")
}
let responses = try await session.translations(from: requests)
for response in responses {
print("\(response.sourceText) -> \(response.targetText)")
}
}
let availability = LanguageAvailability()
let status = await availability.status(
from: Locale.Language(identifier: "en"),
to: Locale.Language(identifier: "ja")
)
switch status {
case .installed: break // Ready to translate offline
case .supported: break // Needs download
case .unsupported: break // Language pair not available
}
These classes are not thread-safe and will produce incorrect results or crash.
// WRONG
let sharedTagger = NLTagger(tagSchemes: [.lexicalClass])
DispatchQueue.concurrentPerform(iterations: 10) { _ in
sharedTagger.string = someText // Data race
}
// CORRECT
await withTaskGroup(of: Void.self) { group in
for _ in 0..<10 {
group.addTask {
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = someText
// process...
}
}
}
NaturalLanguage provides built-in linguistic analysis. Use Core ML for custom trained models. They complement each other via NLModel.
// WRONG: Trying to do NER with raw Core ML
let coreMLModel = try MLModel(contentsOf: modelURL)
// CORRECT: Use NLTagger for built-in NER
let tagger = NLTagger(tagSchemes: [.nameType])
// Or load a custom Core ML model via NLModel
let nlModel = try NLModel(mlModel: coreMLModel)
tagger.setModels([nlModel], forTagScheme: .nameType)
Not all languages have word or sentence embeddings available on device.
// WRONG: Force unwrap
let embedding = NLEmbedding.wordEmbedding(for: .japanese)!
// CORRECT: Handle nil
guard let embedding = NLEmbedding.wordEmbedding(for: .japanese) else {
// Embedding not available for this language
return
}
Creating and configuring a tagger is expensive. Reuse it for the same text.
// WRONG: New tagger per word
for word in words {
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = word
}
// CORRECT: Set string once, enumerate
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = fullText
tagger.enumerateTags(in: fullText.startIndex..<fullText.endIndex,
unit: .word, scheme: .lexicalClass, options: []) { tag, range in
return true
}
Language detection on short strings (under ~20 characters) is unreliable. Set constraints or hints to improve accuracy.
// WRONG: Detect language of a single word
let lang = NLLanguageRecognizer.dominantLanguage(for: "chat") // French or English?
// CORRECT: Provide context
let recognizer = NLLanguageRecognizer()
recognizer.languageHints = [.english: 0.8, .french: 0.2]
recognizer.processString("chat")
NLTokenizer and NLTagger instances used from a single threadNLEmbedding availability checked before use (returns nil if unavailable)LanguageAvailability checked before attempting translation.translationTask() used within a SwiftUI view hierarchyclientIdentifier to match responses to requests.joinNames option used with NER to keep multi-word names togetherNLModel, not raw Core MLreferences/translation-patterns.mdWeekly Installs
333
Repository
GitHub Stars
269
First Seen
Mar 8, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
codex330
opencode327
github-copilot327
amp327
cline327
kimi-cli327
Novel Writer 小说创作工作流指南:7步系统化方法,AI辅助写作与质量分析
942 周安装
SvelteKit 项目结构详解:文件命名、路由布局与错误处理最佳实践
326 周安装
Slack API 操作工具 - 消息管理、回应、置顶与成员信息查询 | OpenClaw 机器人集成
326 周安装
GrepAI 高级搜索选项:JSON/TOON 输出、紧凑模式与 AI 代理集成指南
326 周安装
Snowflake平台技能:使用CLI、Cortex AI函数和Snowpark构建AI数据云应用
326 周安装
AI代码实施计划编写工具 - 遵循TDD原则的详细开发任务分解指南
326 周安装
Cloudflare Agents SDK:构建AI驱动的自主智能体,支持可恢复流式传输与持久化状态
326 周安装