--- title: "How the noslang_stret_names file was made." output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{How the noslang_stret_names file was made.} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, eval = FALSE, comment = "#>" ) ``` ```{r setup} library(conflicted) suppressMessages(conflict_prefer("filter", "dplyr")) library(DOPE) library(xml2) # read_html() library(rvest) # html_nodes(), html_text() library(purrr) # map_dfr() suppressPackageStartupMessages(library(dplyr)) # %>%, bind_rows() library(stringr) # str_detect, str_to_lower() library(tidyr) # pivot_longer() library(tibble) # tibble() library(usethis) # use_data() ``` # Scrape No Slang Data *There is an additional source where slang (synonyms) were scraped: https://www.noslang.com/drugs/dictionary* ![](../inst/extdata/noslang.png) The methods in which this data were extracted are very similar to how data from the DEA were extracted but we will include them here as well: ```{r eval = FALSE} get_slang <- function(page){ street_name <- read_html(paste0("https://www.noslang.com/drugs/dictionary/", page)) %>% html_nodes("table abbr") %>% html_text() desc <- read_html(paste0("https://www.noslang.com/drugs/dictionary/", page)) %>% html_nodes("abbr") %>% html_attr("title") tibble("street_name" = street_name, "description" = desc) } # creates a vector of the '#' sign plus all lowers case letters of the alphabet pages <- c("#", letters) # iterate the function over the vector of letters to get information of the # slang term and it's description noslang_raw <- map_dfr(pages, get_slang) use_data(noslang_raw, overwrite = TRUE) ``` # Finding Drug Names In the No Slang dataset, noslang_street_names (which was scraped from the NoSlang website) there is a `description` variable which contains both drug names and other phrases, for things like amounts. For the `DOPE` package we extracted the drug names. To do this, a dataset called `ns` was created that contains the unique words/phrases in `description`. The code below, adds indicator variables beginning with "d_". That set of variables were created by first checking for drugs that were mentioned by the DEA files. The total number of known drugs in the `description` were tallied. The records that contained 0 drug names were manually checked and new "d_" variables were added if the description was for a drug. The new drugs mentioned by NoSlang.com are marked with "# NS". There are some misspelling and slang words in the `description` variable. They appear in a `|` separated list in the `str_detect()` calls below. ```{r} # add note to use singular instead of plural # add note to use common abbreviations (lsd) vs long names # This was used to shorten the list of phrases to check to find the drug names. ns <- data.frame(description = unique(tolower(noslang_raw$description))) # Use this instead to make analysis file ns <- noslang_raw %>% mutate(description = tolower(description)) # This code adds indicator variables holding > 0 if a drug name appears in the # description variable. Those are used to drop phrases that do not contain # drug names. checkForDrugs <- ns %>% mutate(d_2cb = as.numeric(str_detect(description, "2cb|nexus"))) %>% # NS mutate(d_alphaEt = as.numeric(str_detect(description, "alpha-ethyltryptamine"))) %>% # NS mutate(d_alprazolam = as.numeric(str_detect(description, "xanax"))) %>% mutate(d_amphetamine = as.numeric(str_detect(description, "amphetamine|speed"))) %>% mutate(d_amt = as.numeric(str_detect(description, "alpha-methyltryptamine"))) %>% # NS mutate(d_amobarbital = as.numeric(str_detect(description, "amobarbital"))) %>% # NS mutate(d_amylNitrite = as.numeric(str_detect(description, "amyl nitrite"))) %>% # NS mutate(d_barbiturates = as.numeric(str_detect(description, "barbiturate"))) %>% mutate(d_bathSalts = as.numeric(str_detect(description, "bath salts"))) %>% mutate(d_benzodiazepines = as.numeric(str_detect(description, "benzodiazepine|benzodiazipines"))) %>% mutate(d_clonazepam = as.numeric(str_detect(description, "klonopin"))) %>% mutate(d_cocaine = as.numeric(str_detect(description, "cocaine|coke|coccaine"))) %>% mutate(d_codeine = as.numeric(str_detect(description, "codeine"))) %>% mutate(d_crack = as.numeric(str_detect(description, "crack"))) %>% mutate(d_dextromethorphan = as.numeric(str_detect(description, "dextromethorphan|coricidin|cortison"))) %>% # NS mutate(d_diazepam = as.numeric(str_detect(description, "valium"))) %>% # NS mutate(d_dmt = as.numeric(str_detect(description, "dimethyltryptamine"))) %>% # NS mutate(d_fentanyl = as.numeric(str_detect(description, "fentanyl"))) %>% mutate(d_flakka = as.numeric(str_detect(description, "flakka"))) %>% mutate(d_gbl = as.numeric(str_detect(description, "gbl"))) %>% # NS mutate(d_ghb = as.numeric(str_detect(description, "ghb|gamma hydroxybutyrate"))) %>% mutate(d_heroin = as.numeric(str_detect(description, "heroin|herion"))) %>% mutate(d_hydrocodone = as.numeric(str_detect(description, "hydrocodone|vicodin|lortab|loratab"))) %>% mutate(d_hydromorphone = as.numeric(str_detect(description, "hydromorphone|diluadid"))) %>% mutate(d_inhalants = as.numeric(str_detect(description, "inhalant"))) %>% mutate(d_isobutylNitrite = as.numeric(str_detect(description, "isobutyl nitrite"))) %>% # NS mutate(d_ketamine = as.numeric(str_detect(description, "ketamine"))) %>% mutate(d_khat = as.numeric(str_detect(description, "khat"))) %>% mutate(d_kratom = as.numeric(str_detect(description, "kratom"))) %>% mutate(d_lsd = as.numeric(str_detect(description, "lsd|lysergic acid diethylamide"))) %>% mutate(d_marijuana = as.numeric(str_detect(description, "marijuana|marijuna|cannabis|marajuana|weed|marijauna|maihuana|cannibus|hashish|hasish|blunt|tetrahydrocannabinol|joint|panama red"))) %>% mutate(d_mdma = as.numeric(str_detect(description, "mdma|ecstacy|ecxtasy|ecstasy"))) %>% mutate(d_mescaline = as.numeric(str_detect(description, "peyote|mescaline"))) %>% mutate(d_methadone = as.numeric(str_detect(description, "methadone"))) %>% mutate(d_methamphetamine = as.numeric(str_detect(description, "methamphetamine|crystal myth|crystal rock of meth|methamphetimine|crystal meth"))) %>% mutate(d_methcathinone = as.numeric(str_detect(description, "methcathinone"))) %>% # NS mutate(d_methaqualone = as.numeric(str_detect(description, "methaqualone"))) %>% # NS mutate(d_methylphenidate = as.numeric(str_detect(description, "ritalin"))) %>% mutate(d_morphine = as.numeric(str_detect(description, "morphine|morophine"))) %>% mutate(d_mushrooms = as.numeric(str_detect(description, "mushroom"))) %>% mutate(d_nitrous = as.numeric(str_detect(description, "nitrous oxide"))) %>% # NS mutate(d_opium = as.numeric(str_detect(description, "opium"))) %>% mutate(d_oxycodone = as.numeric(str_detect(description, "oxycodone|oxycontin|oxycotin"))) %>% mutate(d_pcp = as.numeric(str_detect(description, "pcp|phencyclidine"))) %>% # capitalization needs to match dea_factsheets_plus mutate(d_psilocybin = as.numeric(str_detect(description, "psilocybin"))) %>% mutate(d_rohypnol = as.numeric(str_detect(description, "rohypnol"))) %>% mutate(d_salviaDivinorum = as.numeric(str_detect(description, "salvia divinorum"))) %>% mutate(d_spice = as.numeric(str_detect(description, "spice"))) %>% mutate(d_steroids = as.numeric(str_detect(description, "steroids|steriods|steroid"))) %>% mutate(d_u47700 = as.numeric(str_detect(description, "u-47700"))) %>% rowwise() %>% mutate(known = sum(c_across(starts_with("d_")))) # %>% # use this for development # select(description, known, everything()) # recode drug names to the ns_drugs <- checkForDrugs %>% filter(known > 0) %>% select(-known) %>% mutate(description = case_when(description == "nexus" ~ "2cb", description == "speed" ~ "amphetamine", description == "benzodiazipines" ~ "benzodiazipine", description == "coke" ~ "cocaine", description == "coccaine" ~ "cocaine", description == "coricidin" ~ "dextromethorphan", description == "cortison" ~ "dextromethorphan", description == "gamma hydroxybutyrate" ~ "ghb", description == "vicodin" ~ "hydrocodone", description == "lortab" ~ "hydrocodone", description == "loratab" ~ "hydrocodone", description == "herion" ~ "heroin", description == "lysergic acid diethylamide" ~ "lsd", description == "marijuna" ~ "marijuana", description == "cannabis" ~ "marijuana", description == "marajuana" ~ "marijuana", description == "weed" ~ "marijuana", description == "marijauna" ~ "marijuana", description == "maihuana" ~ "marijuana", description == "cannibus" ~ "marijuana", description == "hashish" ~ "marijuana", description == "hasish" ~ "marijuana", description == "blunt" ~ "marijuana", description == "tetrahydrocannabinol" ~ "marijuana", description == "joint" ~ "marijuana", description == "panama red" ~ "marijuana", description == "ecstacy" ~ "mdma", description == "ecxtasy" ~ "mdma", description == "ecstasy" ~ "mdma", description == "peyote" ~ "mescaline", # need to fix in DEA description == "crystal myth" ~ "methamphetamine", description == "crystal rock of meth" ~ "methamphetamine", description == "crystal meth" ~ "methamphetamine", description == "methamphetimine" ~ "methamphetamine", description == "morophine" ~ "morphine", description == "oxycontin" ~ "oxycodone", description == "oxycotin" ~ "oxycodone", description == "phencyclidine" ~ "pcp", description == "steriods" ~ "steroid", description == "steroids" ~ "steroid", TRUE ~ description)) # don't double count crack as both crack and cocaine (use crack) # remove cocaine if "crack cocaine" ns_drugs$d_cocaine[ns_drugs$d_crack > 0] <- 0 # don't double count meth as both meth and amphetamine (use meth) # remove amphetamine if methamphetamine ns_drugs$d_amphetamine[ns_drugs$d_methamphetamine > 0] <- 0 # don't triple count mdma as both meth and amphet (use mdma) # remove amphetamine methamphetamine if methylenedioxymethamphetamine ns_drugs$d_amphetamine[ns_drugs$d_mdma > 0] <- 0 ns_drugs$d_methamphetamine[ns_drugs$d_mdma > 0] <- 0 noslang_street_names <- ns_drugs %>% pivot_longer(cols=starts_with("d_"), names_to = "drug", values_to = "values", names_prefix = "d_") %>% filter(values > 0) %>% select(-values) %>% mutate(drug = case_when(drug == "alphaEt" ~ "alpha-ethyltryptamine", drug == "amylNitrite" ~ "amyl nitrite", drug == "bathSalts" ~ "bath salts", drug == "isobutylNitrite" ~ "isobutyl nitrite", drug == "nitrous" ~ "nitrous oxide", drug == "salviaDivinorum" ~ "salvia divinorum", TRUE ~ drug)) %>% mutate(street_name = str_remove(street_name ,"\\(spanish\\)")) %>% filter(! street_name %in% c("are you anywhere?")) # fix noslang_street_names %>% filter(str_detect(street_name, "/")) # Add talwin usethis::use_data(noslang_street_names, overwrite = TRUE) ```