Tidy pipelines and structured output

knitr::opts_chunk$set(
  collapse = TRUE, comment = "#>",
  eval = identical(tolower(Sys.getenv("LLMR_RUN_VIGNETTES", "false")), "true")
)

We will show both unstructured and structured pipelines, using four model names: - gpt-5-nano (OpenAI) - claude-sonnet-4-20250514 (Anthropic) - gemini-2.5-flash (Gemini) - openai/gpt-oss-20b (Groq)

You will need environment variables OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, GROQ_API_KEY.

library(LLMR)
library(dplyr)

cfg_openai <- llm_config("openai",   "gpt-5-nano")
cfg_cld    <- llm_config("anthropic","claude-sonnet-4-20250514", max_tokens = 512)  # avoid warnings; Anthropic requires max_tokens
cfg_gemini <- llm_config("gemini",   "gemini-2.5-flash")
cfg_groq   <- llm_config("groq",     "openai/gpt-oss-20b")

llm_fn: unstructured (OpenAI)

words <- c("excellent", "awful", "fine")
out <- llm_fn(
  words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral.",
  .config = cfg_openai,
  .return = "columns"
)
out

llm_fn: unstructured (Groq)

out_groq <- llm_fn(
  words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral.",
  .config = cfg_groq,
  .return = "columns"
)
out_groq

llm_fn_structured: schema-first (OpenAI)

schema <- list(
  type = "object",
  properties = list(
    label = list(type = "string", description = "Sentiment label"),
    score = list(type = "number", description = "Confidence 0..1")
  ),
  required = list("label", "score"),
  additionalProperties = FALSE
)

out_s <- llm_fn_structured(
  x = words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral with confidence.",
  .config = cfg_openai,
  .schema = schema,
  .fields = c("label", "score")
)
out_s

llm_mutate: unstructured (Anthropic)

df <- tibble::tibble(
  id   = 1:3,
  text = c("Cats are great pets", "The weather is bad", "I like tea")
)

df_u <- df |>
  llm_mutate(
    answer  = "Give a short category for: {text}",
    .config = cfg_cld,
    .return = "columns"
  )

df_u

llm_mutate: shorthand syntax (NEW in v0.6.2)

The shorthand lets you combine output column and prompt in one argument:

df |>
  llm_mutate(
    category = "Give a short category for: {text}",
    .config = cfg_cld
  )
# Equivalent to: llm_mutate(category, prompt = "Give...", .config = cfg_cld)

Or with multi-turn messages:

df |>
  llm_mutate(
    classified = c(
      system = "You are a text classifier. One word only.",
      user = "Category for: {text}"
    ),
    .config = cfg_openai
  )

llm_mutate with .structured flag (NEW in v0.6.2)

You can now enable structured output directly in llm_mutate() using .structured = TRUE:

schema <- list(
  type = "object",
  properties = list(
    category = list(type = "string"),
    confidence = list(type = "number")
  ),
  required = list("category", "confidence")
)

# Using .structured = TRUE (equivalent to calling llm_mutate_structured)
df |>
  llm_mutate(
    structured_result = "{text}",
    .config = cfg_openai,
    .structured = TRUE,
    .schema = schema
  )

This is equivalent to calling llm_mutate_structured() and supports all the same shorthand syntax.

llm_mutate_structured: structured with shorthand (Gemini)

schema2 <- list(
  type = "object",
  properties = list(
    category  = list(type = "string"),
    rationale = list(type = "string")
  ),
  required = list("category", "rationale"),
  additionalProperties = FALSE
)

# Traditional call
df_s <- df |>
  llm_mutate_structured(
    annot,
    prompt  = "Extract category and a one-sentence rationale for: {text}",
    .config = cfg_gemini,
    .schema = schema2
    # Because a schema is present, fields auto-hoist; you can also pass:
    # .fields = c("category", "rationale")
  )

df_s

# Or use shorthand (NEW in v0.6.2)
df |>
  llm_mutate_structured(
    annot = "Extract category and rationale for: {text}",
    .config = cfg_gemini,
    .schema = schema2
  )