library(dplyr) # For data manipulation
library(haven) # For importing SPSS data
library(labelled) # For working with variable/value labels
# Import the survey data
npors <- read_sav("NPORS_2025_for_public_release_FINAL.sav")
# Reformat numeric variables with value codes
npors <- npors |>
mutate(across(where(is.labelled), to_factor))In the current era of low response rates and ubiquitous non-probability surveys, raking and similar techniques are essential to getting reasonable estimates from survey data. Typically when we rake survey data, we assume that we’re raking to population benchmark data: a ground truth known with certainty. But that’s often not the case. Instead, we often have to rake to data that were obtained from another survey. Sometimes it’s a big, expensive “gold standard” survey such as the American Community Survey (ACS) with tiny margins of error. Other times we have to rake to a small but high-quality survey. A prominent example is the National Public Opinion Reference Survey (NPORS) published by Pew Research. NPORS is meant to provide benchmarks that pollsters can use for raking, including variables that are absent from large government surveys, such as religious or political party affiliation. But NPORS is a survey of only a few thousand people with an appreciable margin of error for some estimates.
If we rake a poll’s data to NPORS, our poll’s margin of error would be larger than if we’d raked the poll’s data to genuine population data. If we want to report an honest margin of error, there’s got to be some way to take this into account. But many pollsters’ standard data analysis tools (SPSS, etc.) don’t have a way of knowing that the weights were based on raking to a survey rather than to population data.
Survey statisticians developed tools for estimating the margin of error (i.e., sampling variance) when raking to another survey, referring to these as “sample-based calibration” methods. Westat’s Jean Opsomer and Andreea Erciulescu wrote a nice paper describing these methods, which are implemented in the R package ‘svrep’. There are a few different approaches out there, but the simplest involve using computational tools like the bootstrap or jackknife.
I’ll walk through a simple example using the 2025 NPORS data.
Loading Example Data from NPORS
First, let’s load the data from an SPSS data file.
The dataset includes two variables that are important for calculating the margin of error: the variable STRATUM gives the sampling stratum, and BASEWT gives the sampling weights. Kudos to Pew for including those variables in the dataset so that users can analyze the data correctly.
Next, we describe the survey’s design features to R so that it can estimate sampling variances correctly. Note that we’re using the actual sampling weights, BASEWT, rather than the final weights (in the variable named WEIGHT) which Pew uses for its official estimates.
library(survey) # For survey data analysis
library(svrep) # For replicate weights and sample-based calibration
library(srvyr) # For data manipulation using 'dplyr'-style syntax
npors_svy <- as_survey_design(
.data = npors,
ids = 1,
strata = STRATUM,
weights = BASEWT
)Now we can produce weighted estimates from the survey, along with a margin of error. Here’s an estimate of political party affiliation.
npors_svy |>
group_by(PARTYSUM) |>
summarize(
pct = survey_mean(vartype = c("se"))
) |>
mutate(moe = pct_se * 1.96)# A tibble: 3 × 4
PARTYSUM pct pct_se moe
<fct> <dbl> <dbl> <dbl>
1 Rep/Lean Rep 0.468 0.00813 0.0159
2 Dem/Lean Dem 0.472 0.00813 0.0159
3 DK/Refused/No lean 0.0604 0.00375 0.00734
Creating an Example Poll Dataset
To illustrate how we would rake a poll’s data to the NPORS data, we need example poll data. For the purposes of this blog post, I’ll just create a fake polling dataset by subsampling records from the NPORS data.
Here’s our “poll” dataset.
poll <- npors |>
slice_sample(
prop = 1, replace = TRUE,
by = STRATUM,
weight_by = BASEWT
)
poll_svy <- as_survey_design(
.data = poll,
ids = 1,
strata = STRATUM,
weight = BASEWT
)Creating Replicate Weights
From here on out, we’ll use the bootstrap for variance estimation. Replication methods are generally the easiest way to handle variance estimation when your data involves raking or other kinds of weight adjustments. The R code below creates 500 bootstrap replicates for each survey, and lets us them for variance estimation.
npors_boot_svy <- npors_svy |> as_bootstrap_design(replicates = 500, mse = TRUE)
poll_boot_svy <- poll_svy |> as_bootstrap_design(replicates = 500, mse = TRUE)Sample-based Calibration
Now that we have replicates, we can calibrate our poll’s data to the NPORS data. The function below from the ‘svrep’ package lets us calibrate on a list of variables, while ensuring that the sampling error in the control totals is accounted for.
raked_poll_svy <- calibrate_to_sample(
primary_rep_design = poll_boot_svy,
control_rep_design = npors_boot_svy,
cal_formula = ~ PARTYSUM + RELIGCAT1 + RACETHN,
calfun = survey::cal.raking
)The ‘naive’ Raking Approach Typically Used in Practice
For comparison, we’ll also create a dataset with calibrated weights where we ignore the fact that the control totals have sampling error. This is unfortunately what analysts typically do in practice.
# Create list of dataframes with control totals
control_totals <- lapply(
list("PARTYSUM", "RELIGCAT1", "RACETHN"),
\(var_name) {
npors_svy |>
group_by(across(all_of(var_name))) |>
summarize(Freq = survey_total(vartype = NULL))
}
)
names(control_totals) <- c('PARTYSUM', 'RELIGCAT1', 'RACETHN')
# Rake the weights, pretending the control totals are fixed population totals
naive_raked_poll_svy <- rake(
design = poll_boot_svy,
sample.margins = list(~ PARTYSUM, ~ RELIGCAT1, ~ RACETHN),
population.margins = control_totals
)Comparing Estimates from Different Calibration Approaches
With that done, let’s produce some estimates. First, let’s look at estimates for religious categories. First we have the estimates from the sample-based calibration. The standard errors are typically between half a percentage point to a percentage point.
raked_poll_svy |>
group_by(RELIGCAT1) |>
summarize(PCT = survey_mean(),
n = unweighted(n()))# A tibble: 5 × 4
RELIGCAT1 PCT PCT_se n
<fct> <dbl> <dbl> <int>
1 Protestant 0.440 0.00754 2189
2 Catholic 0.191 0.00636 981
3 Unaffiliated 0.252 0.00616 1280
4 Other 0.0947 0.00504 457
5 Refused 0.0219 0.00251 115
Next we have the estimates with the “naive” raking approach. The estimated percentages are the same, but the standard errors are all essentially zero, which is misleading.
naive_raked_poll_svy |>
group_by(RELIGCAT1) |>
summarize(PCT = survey_mean(),
n = unweighted(n()))# A tibble: 5 × 4
RELIGCAT1 PCT PCT_se n
<fct> <dbl> <dbl> <int>
1 Protestant 0.440 0.00000454 2189
2 Catholic 0.191 0.00000248 981
3 Unaffiliated 0.252 0.00000193 1280
4 Other 0.0946 0.00000154 457
5 Refused 0.0219 0.000000446 115
We know that the standard errors should be higher, because the estimates come from the NPORS data, which is a survey. Below, we can see that the standard errors from NPORS are precisely the standard errors we saw just now when looking at estimates based on the appropriate sampling-based calibration method.
npors_boot_svy |>
group_by(RELIGCAT1) |>
summarize(PCT = survey_mean(),
n = unweighted(n()))# A tibble: 5 × 4
RELIGCAT1 PCT PCT_se n
<fct> <dbl> <dbl> <int>
1 Protestant 0.440 0.00754 2135
2 Catholic 0.191 0.00636 989
3 Unaffiliated 0.252 0.00616 1344
4 Other 0.0947 0.00504 452
5 Refused 0.0219 0.00251 102
The problem with the naive raking method is obvious when you’re looking at the raking variables, like we just did. But it also occurs when you look at other variables in the survey. For example, let’s look at responses to the following survey question: “If more Americans owned guns, do you think there would be: (1) More crime, (2) Less crime, or (3) No difference.” Responses to this question are obviously going to be associated with the calibration variables, especially political affiliation.
First we have the estimates produced using the appropriate sample-based calibration method.
raked_poll_svy |>
group_by(MOREGUNIMPACT) |>
summarize(PCT = survey_mean(),
n = unweighted(n()))# A tibble: 4 × 4
MOREGUNIMPACT PCT PCT_se n
<fct> <dbl> <dbl> <int>
1 More crime 0.370 0.00763 1999
2 Less crime 0.264 0.00736 1271
3 No difference 0.355 0.00794 1707
4 Refused/Web blank 0.0109 0.00168 45
Next we have the estimates produced using the naive raking method. The standard errors are much smaller, which is misleading. Because the naive raking method pretended that we had actual population data on political party affiliation, the standard error estimates make it look as if our poll’s estimates are much more precise than they really are.
naive_raked_poll_svy |>
group_by(MOREGUNIMPACT) |>
summarize(PCT = survey_mean(),
n = unweighted(n()))# A tibble: 4 × 4
MOREGUNIMPACT PCT PCT_se n
<fct> <dbl> <dbl> <int>
1 More crime 0.370 0.00655 1999
2 Less crime 0.264 0.00639 1271
3 No difference 0.355 0.00794 1707
4 Refused/Web blank 0.0109 0.00167 45
Saving Our Output Dataset
If we want data users to get accurate standard errors with our correctly raked poll data, then we need to provide them with not just the raked weights, but the raked replicate weights. We can easily get a dataset with the raked bootstrap weights, as follows.
output_dataset <- raked_poll_svy |>
as_data_frame_with_weights(
full_wgt_name = "WGT",
rep_wgt_prefix = "BOOTWGT"
)Here’s a preview of the first few records and a handful of variables.
output_dataset |>
select(RESPID, MODE, WGT, BOOTWGT1, BOOTWGT2, BOOTWGT3) |>
head(5) RESPID MODE WGT BOOTWGT1 BOOTWGT2 BOOTWGT3
1 5379 Online 0.9881618 2.9593387 0.969815 0.000000
2 11337 Online 0.6514923 0.6441639 0.000000 2.441569
3 17468 Online 0.9932987 0.0000000 0.964324 1.943268
4 78 Online 0.8296248 0.0000000 0.000000 0.000000
5 10451 Paper 1.2276654 2.6717214 0.000000 0.000000
We can easily write this to a CSV or SPSS file so we can share this with data users.
library(haven)
write_sav(
data = output_dataset,
path = "weighted_poll_data.sav"
)