# install.packages(c("tidyverse", "httr", "XML"))
library(tidyverse)
# `tidyverse` for data cleaning, usability, and output
library(httr)
library(XML)
# `httr` and `XML` for data import
This is all drawn from the Wikipedia article on the Auckland Volcanic Field. I’m using this as practice in extracting data from the web, making it pretty and presentable, all while having a semi-practical use to import geospatial specific points for all of the volcanoes. (Anti-skill loss project my beloved <3).
# Import data from Wikipedia
# From user:schnee response on https://stackoverflow.com/questions/7407735/importing-wikipedia-tables-in-r
url <- "https://en.m.wikipedia.org/w/index.php?title=Auckland_volcanic_field&oldid=1146734511"
# This is a permanent link to the exact data I used (accessed on 2023-07-17), use the following at your own risk:
# url <- "https://en.m.wikipedia.org/wiki/Auckland_volcanic_field"
r <- GET(url)
doc <- readHTMLTable(
doc=content(r, "text"), header = T)
Now that we have imported the data, let’s clean it up a little bit.
volcanoes <-
tibble(doc[[2]]) %>%
# Cleaning headers -----------------------------------------------------------
rename(
Volcanoes = `Volcanoes\n`,
Age = `Age (thousand years)[38]`,
Height = `Height\n`,
Location = `Location (Coordinates)\n`,
Refs = `Refs\n`,
Images = `Images\n`
) %>%
# Feel free to keep the Refs & Images, I don't find them useful --------------
select(!c(Refs, Images)) %>%
# Cleaning Height data -------------------------------------------------------
separate(col = Height, into = c("Height (m)", NA, "Height (ft)", NA, "Quarried")) %>%
mutate(
`Height (m)` = as.integer(`Height (m)`),
`Height (ft)` = as.integer(`Height (ft)`),
Quarried = ifelse(Quarried == "quarried", "T", "F"),
Quarried = replace_na(Quarried, "F"),
Quarried = as.logical(Quarried)
# I don't know if multiple mutates in a row is kosher, but it works for me
) %>%
# Cleaning coordinates, only need one of these after all ---------------------
separate(col = Location, into = c(NA, NA, "Location"), sep = "/") %>%
# Cleaning age ---------------------------------------------------------------
separate(col = Age, into = c("Age", "Uncertainty (±)"), sep = "±")
# Manually update the values bc it's simpler than finding a particular function to do it
volcanoes[53,2] <- "30.55"
volcanoes[53,3] <- "0.45"
volcanoes[3,2] <- "130"
volcanoes[27,2] <- "45"
volcanoes[31,2] <- "130"
volcanoes[33,2] <- "0.62"
volcanoes[47,2] <- "28.0"
# Yes, this is horribly messy and gross. How'd you know?
volcanoes <- volcanoes %>%
mutate(
Age = as.numeric(Age),
Age = Age * 1000,
Age = as.integer(Age),
`Uncertainty (±)` = as.numeric(`Uncertainty (±)`),
`Uncertainty (±)` = `Uncertainty (±)` * 1000,
`Uncertainty (±)` = as.integer(`Uncertainty (±)`)
)
# That one name that wouldn't parse perfectly
volcanoes[52,1] <- "Whakamuhu / Saint Heliers / Glover Park"
# Data output for however you want it ------------------------------------------
write_csv(volcanoes, "volcanoes.csv")
# Print dataset ================================================================
knitr::kable(volcanoes)
| Volcanoes | Age | Uncertainty (±) | Height (m) | Height (ft) | Quarried | Location |
|---|---|---|---|---|---|---|
| Albert Park Volcano | 145000 | 4000 | NA | NA | FALSE | -36.8486; 174.7673 |
| Ash Hill | 31800 | 400 | 30 | 98 | FALSE | -37.002754; 174.867545 |
| Boggust Park Crater | 130000 | NA | 14 | 46 | FALSE | -36.955413; 174.813552 |
| Cemetery Crater | NA | NA | 33 | 108 | FALSE | -36.989828; 174.841082 |
| Crater Hill | 30400 | 800 | NA | NA | FALSE | -36.986546; 174.827135 |
| Grafton Volcano | 106500 | NA | 82 | 269 | FALSE | -36.858440; 174.763624 |
| Hampton Park | 57000 | 32000 | 43 | 141 | FALSE | -36.950925; 174.89544 |
| Kohuora | 33700 | 2400 | 37 | 121 | FALSE | -36.97873; 174.842691 |
| Māngere Lagoon | 59500 | NA | 20 | 66 | FALSE | -36.95702; 174.77763 |
| Matanginui / Green Mount | 19600 | 6600 | 78 | 256 | FALSE | -36.939911; 174.898267 |
| Matukutureia / McLaughlins Mountain | 48200 | 6400 | 73 | 240 | FALSE | -37.013511; 174.845974 |
| Maungakiekie / One Tree Hill | 67000 | 12000 | 182 | 597 | FALSE | -36.90000; 174.78306 |
| Maungarahiri / Little Rangitoto | 24600 | 600 | 75 | 246 | FALSE | -36.875407; 174.809636 |
| Maungarei / Mount Wellington | 10000 | 1000 | 135 | 443 | FALSE | -36.89306; 174.846556 |
| Maungataketake / Elletts Mountain | 88900 | 4800 | 76 | 249 | FALSE | -36.994635; 174.747548 |
| Maungauika / North Head | 87500 | 15200 | 50 | 160 | FALSE | -36.827751; 174.81205 |
| Maungawhau / Mount Eden | 28000 | 600 | 196 | 643 | FALSE | -36.877; 174.764 |
| Motukorea / Browns Island | 24400 | 600 | 68 | 223 | FALSE | -36.8306; 174.8948 |
| Mount Robertson / Sturges Park | 24300 | 800 | 78 | 256 | FALSE | -36.948477; 174.841726 |
| Ōhinerau / Mount Hobson | 34200 | 1800 | 143 | 469 | FALSE | -36.877814; 174.786156 |
| Ohuiarangi / Pigeon Mountain | 23400 | 800 | 55 | 180 | FALSE | -36.888846; 174.903116 |
| Ōrākei Basin | 126000 | 6000 | 54 | 177 | FALSE | -36.867124; 174.81308 |
| Ōtāhuhu / Mount Richmond | 30200 | 4200 | 50 | 160 | FALSE | -36.932562; 174.839451 |
| Ōtuataua | 24200 | 1800 | 64 | 210 | FALSE | -36.98611; 174.75417 |
| Ōwairaka / Te Ahi-kā-a-Rakataura / Mount Albert | 119200 | 5600 | 135 | 443 | FALSE | -36.890475; 174.720097 |
| Puhinui Craters | NA | NA | 24 | 79 | FALSE | -37.01465; 174.83296 |
| Pukaki Lagoon | 45000 | NA | 37 | 121 | FALSE | -36.982998; 174.810226 |
| Pukeiti | 23700 | NA | 30 | 98 | FALSE | -36.983756; 174.757183 |
| Pukekawa / Auckland Domain | 106000 | 8000 | 77 | 253 | FALSE | -36.859158; 174.775808 |
| Pukewīwī / Puketāpapa / Mount Roskill | 105300 | 6200 | 110 | 360 | FALSE | -36.912286; 174.737371 |
| Pukewairiki | 130000 | NA | 35 | 115 | FALSE | -36.944078; 174.865887 |
| Pupuke | 193200 | 5600 | 34 | 112 | FALSE | -36.780115; 174.766184 |
| Rangitoto Island | 620 | NA | 260 | 850 | FALSE | -36.786742; 174.860115 |
| Rarotonga / Mount Smart | 20100 | 200 | 87 | 285 | TRUE | -36.91833; 174.81250 |
| Styaks Swamp | 19100 | NA | 16 | 52 | FALSE | -36.936138; 174.900155 |
| Takaroro / Mount Cambria | 42300 | 22000 | 30 | 98 | TRUE | -36.824444; 174.801933 |
| Takarunga / Mount Victoria | 34800 | 4000 | 87 | 285 | FALSE | -36.8266; 174.7990 |
| Taurere / Taylors Hill | 30200 | 200 | 56 | 184 | FALSE | -36.864223; 174.869943 |
| Te Apunga-o-Tainui / McLennan Hills | 41300 | 2400 | 45 | 148 | TRUE | -36.929208; 174.846468 |
| Te Hopua-a-Rangi / Gloucester Park | 31000 | NA | 12 | 39 | FALSE | -36.9295; 174.784734 |
| Te Kopua Kai-a-Hiku / Panmure Basin | 25200 | 1800 | 35 | 115 | FALSE | -36.90495; 174.849343 |
| Te Kopua-o-Matakamokamo / Tank Farm / Tuff Crater | 181000 | 2000 | 46 | 151 | FALSE | -36.8020; 174.7533 |
| Onepoto | 187600 | NA | 46 | 151 | FALSE | -36.80818; 174.75085 |
| Te Kōpuke / Tītīkōpuke / Mount St John | 75300 | 3400 | 126 | 413 | FALSE | -36.883431; 174.780196 |
| Te Motu-a-Hiaroa / Puketutu | 29800 | 4400 | 65 | 213 | FALSE | -36.965186; 174.747248 |
| Te Pane-o-Mataaho / Māngere Mountain | 59000 | 20000 | 106 | 348 | FALSE | -36.9496; 174.7831 |
| Te Pou Hawaiki | 28000 | NA | 95 | 312 | TRUE | -36.88247; 174.766726 |
| Te Puke ō Tara / Otara Hill | 56500 | NA | 89 | 292 | TRUE | -36.947105; 174.898363 |
| Te Tātua-a-Riukiuta / Three Kings | 31000 | 1800 | 133 | 436 | FALSE | -36.902926; 174.754651 |
| Te Tauoma / Purchas Hill | 10900 | 200 | 50 | 160 | TRUE | -36.887138; 174.847476 |
| Waitomokia / Mt Gabriel | 20300 | 200 | 22 | 72 | TRUE | -36.976981; 174.770336 |
| Whakamuhu / Saint Heliers / Glover Park | 161000 | 36000 | 65 | 213 | FALSE | -36.846911; 174.867662 |
| Wiri Mountain / Matukutūruru | 30550 | 450 | 80 | 260 | TRUE | -37.007334; 174.858441 |
Well, isn’t that the question. For me, I created this because I plan on visiting all 53(!) historic volcanoes in Auckland. It seems fun and it seems like a great way to explore the city. I went through the hassle of manually making this into a Google Maps list, but it’s deeply inaccurate (I attached each point to a nearby place rather than its coordinates) so I used the code below in Volcanoes to visit generator to add it to a Google My Maps. However, the points within the wikipedia table seem a little inaccurate… I will report back from my adventures with any important changes.
Feel free to play with this, if you want a neat small dataset or some practice in R by trying to reproduce this.
Some fun stats (now easy to do with the data already in R):
| Number of volcanoes | Total Height (m) | Total Height (ft) | Mean Age (thousand years) | Number of Quarried sites |
|---|---|---|---|---|
| 53 | 3669 | 12016 | 60 | 8 |
Anyways. Have fun, enjoy the data! I might leave an update on any eventual updates, or perhaps a little table that is updated with info on the sites I have visited so far.
Version 1.1: Added volcano tracker.
# A previous Alice did not have a better way of recording which volcanoes she had visited yet
volcanoes <- read_csv("volcanoes.csv")
visited <- tibble(
Volcanoes = c("Maungauika / North Head", "Takarunga / Mount Victoria", "Albert Park Volcano", "Maungakiekie / One Tree Hill", "Te Kōpuke / Tītīkōpuke / Mount St John", "Ōhinerau / Mount Hobson", "Ohuiarangi / Pigeon Mountain", "Te Kopua-o-Matakamokamo / Tank Farm / Tuff Crater", "Onepoto", "Maungarei / Mount Wellington", "Taurere / Taylors Hill", "Whakamuhu / Saint Heliers / Glover Park", "Maungawhau / Mount Eden", "Takaroro / Mount Cambria", "Te Pou Hawaiki", "Te Tātua-a-Riukiuta / Three Kings", "Te Hopua-a-Rangi / Gloucester Park", "Te Pane-o-Mataaho / Māngere Mountain", "Māngere Lagoon", "Ōrākei Basin", "Maungarahiri / Little Rangitoto", "Pukekawa / Auckland Domain", "Grafton Volcano", "Ash Hill", "Wiri Mountain / Matukutūruru", "Matukutureia / McLaughlins Mountain", "Puhinui Craters", "Pukewīwī / Puketāpapa / Mount Roskill", "Ōwairaka / Te Ahi-kā-a-Rakataura / Mount Albert"),
Visited = T, # Redundant column for future filters or whatever idk
`Date visited` = c("2023-07-11", "2023-07-11", "2023-07-17", "2023-07-22", "2023-07-22", "2023-07-22", "2023-07-30", "2023-08-03", "2023-08-03", "2023-08-27", "2023-08-27", "2023-08-27", "2023-09-01", "2023-09-17", "2023-09-27", "2023-10-13", "2023-10-14", "2023-10-14", "2023-10-14", "2023-11-04", "2023-11-04", "2023-10-26", "2023-08-05", "2023-11-06", "2023-11-06", "2023-11-06", "2023-11-06", "2023-11-13", "2023-11-13")
)
bigvisited <- inner_join(volcanoes, visited) %>%
select(Volcanoes, `Date visited`, Age, `Height (ft)`, Location) %>%
mutate(
`Date visited` = as.Date(`Date visited`)
)
summaryvisited <- inner_join(volcanoes, visited) %>%
summarize(
Location = paste0("n = ", n(), " / ", nrow(volcanoes), " (", round((n() / nrow(volcanoes)) * 100), "%)"),
`Height (ft)` = sum(`Height (ft)`, na.rm = T),
Age = as.integer(mean(Age, na.rm = T)),
Volcanoes = "Mean Age / Total Height"
)
knitr::kable(
bind_rows(bigvisited, summaryvisited) %>%
arrange(`Date visited`, Volcanoes)
)
| Volcanoes | Date visited | Age | Height (ft) | Location |
|---|---|---|---|---|
| Maungauika / North Head | 2023-07-11 | 87500 | 160 | -36.827751; 174.81205 |
| Takarunga / Mount Victoria | 2023-07-11 | 34800 | 285 | -36.8266; 174.7990 |
| Albert Park Volcano | 2023-07-17 | 145000 | NA | -36.8486; 174.7673 |
| Maungakiekie / One Tree Hill | 2023-07-22 | 67000 | 597 | -36.90000; 174.78306 |
| Te Kōpuke / Tītīkōpuke / Mount St John | 2023-07-22 | 75300 | 413 | -36.883431; 174.780196 |
| Ōhinerau / Mount Hobson | 2023-07-22 | 34200 | 469 | -36.877814; 174.786156 |
| Ohuiarangi / Pigeon Mountain | 2023-07-30 | 23400 | 180 | -36.888846; 174.903116 |
| Onepoto | 2023-08-03 | 187600 | 151 | -36.80818; 174.75085 |
| Te Kopua-o-Matakamokamo / Tank Farm / Tuff Crater | 2023-08-03 | 181000 | 151 | -36.8020; 174.7533 |
| Grafton Volcano | 2023-08-05 | 106500 | 269 | -36.858440; 174.763624 |
| Maungarei / Mount Wellington | 2023-08-27 | 10000 | 443 | -36.89306; 174.846556 |
| Taurere / Taylors Hill | 2023-08-27 | 30200 | 184 | -36.864223; 174.869943 |
| Whakamuhu / Saint Heliers / Glover Park | 2023-08-27 | 161000 | 213 | -36.846911; 174.867662 |
| Maungawhau / Mount Eden | 2023-09-01 | 28000 | 643 | -36.877; 174.764 |
| Takaroro / Mount Cambria | 2023-09-17 | 42300 | 98 | -36.824444; 174.801933 |
| Te Pou Hawaiki | 2023-09-27 | 28000 | 312 | -36.88247; 174.766726 |
| Te Tātua-a-Riukiuta / Three Kings | 2023-10-13 | 31000 | 436 | -36.902926; 174.754651 |
| Māngere Lagoon | 2023-10-14 | 59500 | 66 | -36.95702; 174.77763 |
| Te Hopua-a-Rangi / Gloucester Park | 2023-10-14 | 31000 | 39 | -36.9295; 174.784734 |
| Te Pane-o-Mataaho / Māngere Mountain | 2023-10-14 | 59000 | 348 | -36.9496; 174.7831 |
| Pukekawa / Auckland Domain | 2023-10-26 | 106000 | 253 | -36.859158; 174.775808 |
| Maungarahiri / Little Rangitoto | 2023-11-04 | 24600 | 246 | -36.875407; 174.809636 |
| Ōrākei Basin | 2023-11-04 | 126000 | 177 | -36.867124; 174.81308 |
| Ash Hill | 2023-11-06 | 31800 | 98 | -37.002754; 174.867545 |
| Matukutureia / McLaughlins Mountain | 2023-11-06 | 48200 | 240 | -37.013511; 174.845974 |
| Puhinui Craters | 2023-11-06 | NA | 79 | -37.01465; 174.83296 |
| Wiri Mountain / Matukutūruru | 2023-11-06 | 30550 | 260 | -37.007334; 174.858441 |
| Pukewīwī / Puketāpapa / Mount Roskill | 2023-11-13 | 105300 | 360 | -36.912286; 174.737371 |
| Ōwairaka / Te Ahi-kā-a-Rakataura / Mount Albert | 2023-11-13 | 119200 | 443 | -36.890475; 174.720097 |
| Mean Age / Total Height | NA | 71926 | 7613 | n = 29 / 53 (55%) |
Tracker last updated 2023-11-19.
Version 1.3 added to visit csv generator for maps.
You know how I mentioned wanting to have a nice way to export the location of the volcanoes I still had to visit so I’d be able to figure out where to go next a bit easier? Now is the time!
volcanoes <- volcanoes %>%
mutate(
Lat = str_split_i(Location, ";", 1),
Long = str_split_i(Location, ";", 2)
)
left_join(volcanoes, visited) %>%
filter(is.na(Visited)) %>%
write_csv(., "tovisit.csv")
left_join(volcanoes, visited) %>%
write_csv(., "visited.csv")
And, now that we have that with Lat and Long specified in columns, we can import it into our mapping software of choice. A neat thing I found is that you can re-import specific layers, which is nice once a couple of volcanoes are ticked off the list.
Version 1.4 added a leaflet map of the volcanoes to the
site.
library(sf)
library(leaflet)
volcanoes <- read_csv("visited.csv") %>%
mutate(
Visited = ifelse(is.na(Visited), F, Visited)
)
cooords <- volcanoes %>%
select(Long, Lat) %>%
as.matrix()
coords_rows <- vector(mode = "list", length = nrow(cooords))
for (rows in 1:nrow(cooords)) {
coords_rows[rows] <- list(cooords[rows, ])
}
geovolcanoes <- map_vec(coords_rows, \(x){st_sfc(st_point(x), crs = 4326)}) %>% cbind(volcanoes, .) %>% st_as_sf()
icons <- awesomeIcons(
library = "ion",
icon = "",
markerColor = sapply(volcanoes$Visited, \(x){if(x == T){"darkblue"}else{"lightgray"}})
)
leaflet(data = geovolcanoes) %>%
addTiles(urlTemplate = "https://cartodb-basemaps-{s}.global.ssl.fastly.net/light_all/{z}/{x}/{y}.png",
attribution = "Map tiles by Carto, under CC BY 3.0. Data by OpenStreetMap, under ODbL.") %>%
addAwesomeMarkers(~Long, ~Lat, icon = icons, label = ~Volcanoes) %>%
addLegend(
position = "bottomright",
colors = c("darkblue", "lightgray"),
labels = c("Visited", "Unvisited")
)
The source code for this project is available at https://github.com/legallyahc/volcanic, with all work
done right in this RMarkdown file (dataset.rmd)!
This project is based on information from Wikipedia which was authored by Wikipedia contributors and is licensed under CC-BY-SA 4.0. This project is licensed under the GNU General Public License v3 or later, a compatible license with CC-BY-SA 4.0.
Originally published on 2023-07-16. Version 1.4.0.