jjstreamgraph: Comprehensive Interactive Stream Graph Visualization
ClinicoPath
2025-07-13
Source:vignettes/jjstatsplot-32-jjstreamgraph-comprehensive.Rmd
jjstatsplot-32-jjstreamgraph-comprehensive.Rmd
Introduction to jjstreamgraph
The jjstreamgraph
function is a powerful wrapper around
the streamgraph
R package that creates interactive, flowing
stream graphs for visualizing time series data with multiple categories.
Stream graphs are particularly effective for showing changes in
composition over time, making them ideal for financial data, web
analytics, epidemiological trends, and any multi-category time series
analysis.
Key Features
- Interactive Visualization: Mouse-over effects and dynamic highlighting
- Multiple Layout Options: Silhouette, zero baseline, and expanded views
- Flexible Interpolation: Smooth, linear, or step interpolation between data points
- Color Palettes: Multiple ColorBrewer palettes for optimal data distinction
- Performance Optimized: Enhanced caching and data preparation for faster rendering
- Comprehensive Error Handling: Robust validation and user-friendly error messages
Basic Stream Graph Creation
Simple Time Series Example
Let’s start with a basic stream graph showing product sales over time:
# Create sample product sales data
sales_data <- data.frame(
month = rep(1:12, 4),
sales = c(
cumsum(rnorm(12, 5, 2)) + 20, # Product A
cumsum(rnorm(12, 3, 1.5)) + 15, # Product B
cumsum(rnorm(12, 4, 1.8)) + 18, # Product C
cumsum(rnorm(12, 2, 1)) + 10 # Product D
),
product = factor(rep(c("Product A", "Product B", "Product C", "Product D"), each = 12))
)
# Ensure positive values for stream graphs
sales_data$sales <- pmax(sales_data$sales, 1)
# Create basic stream graph
result_basic <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
offset = "silhouette",
interpolate = "cardinal",
palette = "Blues"
)
print(result_basic)
Understanding Stream Graph Components
Time Variable (X-axis)
- Represents the temporal dimension
- Can be numeric (months, years, quarters) or dates
- Should have consistent intervals for best results
Layout and Styling Options
Different Offset Types
The offset parameter controls how streams are positioned relative to each other:
Silhouette Layout (Default)
result_silhouette <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
offset = "silhouette",
palette = "Set1"
)
print(result_silhouette)
Zero Baseline Layout
result_zero <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
offset = "zero",
palette = "Greens"
)
print(result_zero)
Expanded (Percentage) Layout
result_expand <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
offset = "expand",
palette = "Reds"
)
print(result_expand)
Interpolation Methods
Smooth (Cardinal) Interpolation
result_smooth <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
interpolate = "cardinal",
palette = "Pastel1"
)
print(result_smooth)
Linear Interpolation
result_linear <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
interpolate = "linear",
palette = "Set1"
)
print(result_linear)
Step Interpolation
result_step <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
interpolate = "step",
palette = "Blues"
)
print(result_step)
Color Palettes
Available Palette Options
palettes <- c("Blues", "Greens", "Reds", "Pastel1", "Set1")
for (palette in palettes[1:3]) { # Show first 3 for space
cat("### Palette:", palette, "\n")
result_palette <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
palette = palette
)
print(result_palette)
}
Real-World Applications
Financial Data Visualization
# Example with financial sector data
financial_data <- data.frame(
quarter = rep(1:20, 5), # 5 years of quarterly data
revenue = c(
cumsum(rnorm(20, 8, 3)) + 200, # Technology
cumsum(rnorm(20, 6, 2.5)) + 150, # Healthcare
cumsum(rnorm(20, 4, 2)) + 100, # Energy
cumsum(rnorm(20, 5, 2.2)) + 120, # Finance
cumsum(rnorm(20, 3, 1.8)) + 80 # Consumer
),
sector = factor(rep(c("Technology", "Healthcare", "Energy", "Finance", "Consumer"), each = 20))
)
financial_result <- jjstreamgraph(
data = financial_data,
timeVar = "quarter",
valueVar = "revenue",
groupVar = "sector",
offset = "zero",
interpolate = "cardinal",
palette = "Set1",
width = "1000px",
height = "600px"
)
print(financial_result)
Web Analytics Dashboard
# Example with web traffic data
web_data <- data.frame(
week = rep(1:52, 6),
visitors = c(
500 + cumsum(rnorm(52, 2, 10)), # Organic Search
300 + cumsum(rnorm(52, 1, 8)), # Direct
200 + cumsum(rnorm(52, 0.5, 12)), # Social Media
150 + cumsum(rnorm(52, 1.2, 6)), # Email
100 + cumsum(rnorm(52, 0.8, 5)), # Paid Ads
80 + cumsum(rnorm(52, 0.3, 4)) # Referral
),
source = factor(rep(c("Organic Search", "Direct", "Social Media", "Email", "Paid Ads", "Referral"), each = 52))
)
# Ensure positive values
web_data$visitors <- pmax(web_data$visitors, 1)
web_result <- jjstreamgraph(
data = web_data,
timeVar = "week",
valueVar = "visitors",
groupVar = "source",
offset = "silhouette",
palette = "Blues"
)
print(web_result)
Clinical Research Applications
# Example with patient enrollment data
clinical_data <- data.frame(
week = rep(1:26, 4), # 6 months of weekly data
patients = c(
cumsum(rpois(26, 3)), # Site 1
cumsum(rpois(26, 4)), # Site 2
cumsum(rpois(26, 2)), # Site 3
cumsum(rpois(26, 5)) # Site 4
),
site = factor(rep(paste("Site", 1:4), each = 26))
)
clinical_result <- jjstreamgraph(
data = clinical_data,
timeVar = "week",
valueVar = "patients",
groupVar = "site",
offset = "zero",
interpolate = "linear",
palette = "Greens"
)
print(clinical_result)
Advanced Customization
Custom Dimensions
# Create stream graph with custom size
result_custom_size <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
width = "1200px",
height = "700px",
palette = "Set1"
)
print(result_custom_size)
Working with Date Variables
# Create data with actual dates
date_data <- data.frame(
date = rep(seq(as.Date("2023-01-01"), as.Date("2023-12-01"), by = "month"), 3),
metric = c(
cumsum(rnorm(12, 2, 1)) + 15, # Metric A
cumsum(rnorm(12, 1.5, 0.8)) + 12, # Metric B
cumsum(rnorm(12, 1, 0.6)) + 10 # Metric C
),
category = factor(rep(c("Category A", "Category B", "Category C"), each = 12))
)
# Convert dates to numeric for streamgraph
date_data$date_numeric <- as.numeric(date_data$date)
date_result <- jjstreamgraph(
data = date_data,
timeVar = "date_numeric",
valueVar = "metric",
groupVar = "category",
offset = "silhouette",
palette = "Blues"
)
print(date_result)
Data Preparation and Best Practices
Data Requirements
1. Time Series Structure
- Data should be in long format (one row per time-category combination)
- Consistent time intervals work best
- Missing time points can cause visual discontinuities
2. Value Preparation
# Example of proper data preparation
raw_data <- data.frame(
time = rep(1:10, 3),
value = c(rnorm(10, 5, 2), rnorm(10, -2, 1), rnorm(10, 8, 3)),
group = factor(rep(c("A", "B", "C"), each = 10))
)
# Stream graphs work best with positive values
prepared_data <- raw_data %>%
mutate(
# Ensure positive values
value_positive = pmax(value, 0.1),
# Optional: smooth extreme values
value_smoothed = pmax(pmin(value_positive, quantile(value_positive, 0.95)),
quantile(value_positive, 0.05))
)
head(prepared_data)
3. Category Management
# Ensure meaningful category names
sales_data_clean <- sales_data %>%
mutate(
product = factor(product,
levels = c("Product A", "Product B", "Product C", "Product D"),
labels = c("Premium Line", "Standard Line", "Budget Line", "Specialty Line"))
)
result_clean <- jjstreamgraph(
data = sales_data_clean,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
palette = "Set1"
)
print(result_clean)
Troubleshooting Common Issues
Handling Missing Values
# Data with missing values
data_with_na <- sales_data
data_with_na$sales[c(5, 15, 25)] <- NA
# The function automatically handles missing values
result_na <- jjstreamgraph(
data = data_with_na,
timeVar = "month",
valueVar = "sales",
groupVar = "product"
)
print(result_na)
Dealing with Irregular Time Series
# Irregular time intervals
irregular_data <- data.frame(
time = c(1, 3, 5, 8, 12, 15, 20, 25, 30, 35),
value = cumsum(rnorm(10, 2, 1)) + 10,
group = "Single Group"
)
# Add more groups for stream graph
irregular_expanded <- bind_rows(
irregular_data,
irregular_data %>% mutate(group = "Group B", value = value * 0.8),
irregular_data %>% mutate(group = "Group C", value = value * 1.2)
)
result_irregular <- jjstreamgraph(
data = irregular_expanded,
timeVar = "time",
valueVar = "value",
groupVar = "group",
interpolate = "linear" # Linear works better for irregular intervals
)
print(result_irregular)
Performance Considerations
Large Dataset Handling
The function includes several performance optimizations:
# Performance test with larger dataset
large_data <- data.frame(
time = rep(1:365, 8), # Daily data for 8 categories
value = as.vector(replicate(8, cumsum(rnorm(365, 1, 2)) + 20)),
category = factor(rep(paste("Category", 1:8), each = 365))
)
# This should render efficiently due to optimizations
start_time <- Sys.time()
performance_result <- jjstreamgraph(
data = large_data,
timeVar = "time",
valueVar = "value",
groupVar = "category",
palette = "Set1"
)
end_time <- Sys.time()
cat("Rendering time:", difftime(end_time, start_time, units = "secs"), "seconds\n")
print(performance_result)
Optimization Features
The function implements several performance enhancements:
- Data Preparation Caching: Processed data is cached to avoid recomputation
- Option Preprocessing: Common option processing is done once and cached
- Hash-based Change Detection: Only reprocesses data when inputs change
- Efficient Memory Usage: Minimizes data copying and transformation overhead
- Error Handling: Comprehensive validation prevents unnecessary processing
Comparison with Other Visualization Types
When to Use Stream Graphs
✅ Ideal Use Cases:
- Multi-category time series: When you have 3+ categories changing over time
- Composition analysis: Understanding how proportions change over time
- Flow visualization: Showing smooth transitions and trends
- Interactive exploration: When users need to hover and explore data points
⚠️ Consider Alternatives When:
- Precise value reading: Line charts may be better for exact values
- Few categories (< 3): Simple line or area charts might be clearer
- Static presentations: Stream graphs lose impact without interactivity
- Negative values: Consider transforming data or using different chart types
Stream Graph vs. Stacked Area Chart
# Stream graph emphasizes flow and composition
stream_result <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
offset = "silhouette",
palette = "Blues"
)
# Zero-baseline stream graph similar to stacked area
stacked_result <- jjstreamgraph(
data = sales_data,
timeVar = "month",
valueVar = "sales",
groupVar = "product",
offset = "zero",
palette = "Blues"
)
print(stream_result)
print(stacked_result)
Best Practices and Recommendations
Design Guidelines
- Color Selection: Use ColorBrewer palettes for accessibility and clarity
- Category Limits: 3-8 categories work best; too many become cluttered
- Time Granularity: Choose appropriate time intervals for your data story
- Interpolation Choice: Smooth for trends, linear for accurate representation
- Layout Selection: Silhouette for emphasis on overall patterns, zero for baseline comparison
Data Quality Tips
# Check data quality before visualization
check_data_quality <- function(data, time_var, value_var, group_var) {
cat("Data Quality Check:\n")
cat("- Total rows:", nrow(data), "\n")
cat("- Missing values in time variable:", sum(is.na(data[[time_var]])), "\n")
cat("- Missing values in value variable:", sum(is.na(data[[value_var]])), "\n")
cat("- Missing values in group variable:", sum(is.na(data[[group_var]])), "\n")
cat("- Number of groups:", length(unique(data[[group_var]])), "\n")
cat("- Time range:", min(data[[time_var]], na.rm = TRUE), "to", max(data[[time_var]], na.rm = TRUE), "\n")
cat("- Value range:", min(data[[value_var]], na.rm = TRUE), "to", max(data[[value_var]], na.rm = TRUE), "\n")
negative_values <- sum(data[[value_var]] < 0, na.rm = TRUE)
if (negative_values > 0) {
cat("- WARNING:", negative_values, "negative values found (consider transformation)\n")
}
}
# Example usage
check_data_quality(sales_data, "month", "sales", "product")
Performance Tips
- Data Size: For large datasets (>10k rows), consider aggregating or sampling
- Time Intervals: Regular intervals perform better than irregular ones
- Category Count: Limit to 8-10 categories for optimal performance
- Browser Considerations: Stream graphs are interactive and require modern browsers
Integration with Clinical Workflows
Example: Disease Surveillance
# Example for epidemiological surveillance
surveillance_data <- data.frame(
week = rep(1:52, 4),
cases = c(
rpois(52, lambda = 5 + 3 * sin(2 * pi * (1:52) / 52)), # Seasonal disease A
rpois(52, lambda = 3 + 2 * sin(2 * pi * (1:52) / 26)), # Seasonal disease B
rpois(52, lambda = 2), # Endemic disease C
c(rep(1, 30), rpois(22, 15)) # Outbreak disease D
),
disease = factor(rep(c("Influenza", "RSV", "COVID-19", "Norovirus"), each = 52))
)
surveillance_result <- jjstreamgraph(
data = surveillance_data,
timeVar = "week",
valueVar = "cases",
groupVar = "disease",
offset = "zero",
palette = "Reds",
width = "1000px",
height = "500px"
)
print(surveillance_result)
Troubleshooting Guide
Common Error Messages
“Data contains no (complete) rows”
- Cause: All rows have missing values in required variables
- Solution: Check for missing data and clean before visualization
Data Format Validation
# Function to validate data format
validate_streamgraph_data <- function(data, time_var, value_var, group_var) {
errors <- c()
# Check if variables exist
if (!time_var %in% names(data)) errors <- c(errors, "Time variable not found in data")
if (!value_var %in% names(data)) errors <- c(errors, "Value variable not found in data")
if (!group_var %in% names(data)) errors <- c(errors, "Group variable not found in data")
if (length(errors) > 0) return(errors)
# Check data types
if (!is.numeric(data[[time_var]])) errors <- c(errors, "Time variable must be numeric")
if (!is.numeric(data[[value_var]])) errors <- c(errors, "Value variable must be numeric")
# Check for sufficient data
complete_rows <- sum(complete.cases(data[c(time_var, value_var, group_var)]))
if (complete_rows < 3) errors <- c(errors, "Need at least 3 complete data rows")
# Check for multiple groups
if (length(unique(data[[group_var]])) < 2) {
errors <- c(errors, "Need at least 2 groups for stream graph")
}
if (length(errors) == 0) {
return("Data validation passed!")
} else {
return(errors)
}
}
# Test validation
validate_streamgraph_data(sales_data, "month", "sales", "product")
Summary
The jjstreamgraph
function provides a comprehensive
solution for time series visualization with:
- Interactive stream graphs with multiple layout and styling options
- Performance optimizations for efficient rendering
- Robust error handling and data validation
- Flexible customization for various use cases
- Clinical research applications for epidemiological and patient data
This makes it an excellent choice for exploring temporal patterns in multi-category data across research, clinical, and business applications.
Function Reference
For complete parameter documentation, see the streamgraph package documentation: - CRAN streamgraph documentation
# Session information
sessionInfo()