# install.packages(c("tidyverse", "httr", "XML"))
library(tidyverse)
# `tidyverse` for data cleaning, usability, and output
library(httr)
library(XML)
# `httr` and `XML` for data import
This is whole thing is drawn from the Wikipedia article on the Auckland Volcanic Field. I’m using this as practice in extracting data from the web, making it pretty and presentable, all while having a semi-practical use to import geospatial specific points for all of the volcanoes. (Anti-skill loss project my beloved <3).
# Import data from Wikipedia
# From user:schnee response on https://stackoverflow.com/questions/7407735/importing-wikipedia-tables-in-r
url <- "https://en.m.wikipedia.org/w/index.php?title=Auckland_volcanic_field&oldid=1146734511"
# This is a permanent link to the exact data I used (accessed on 2023-07-17), use the following at your own risk:
# url <- "https://en.m.wikipedia.org/wiki/Auckland_volcanic_field"
r <- GET(url)
doc <- readHTMLTable(
doc=content(r, "text"), header = T)
Now that we have imported the data, let’s clean it up a little bit.
volcanoes <-
tibble(doc[[2]]) %>%
# Cleaning headers -----------------------------------------------------------
rename(
Volcanoes = `Volcanoes\n`,
Age = `Age (thousand years)[38]`,
Height = `Height\n`,
Location = `Location (Coordinates)\n`,
Refs = `Refs\n`,
Images = `Images\n`
) %>%
# Feel free to keep the Refs & Images, I don't find them useful --------------
select(!c(Refs, Images)) %>%
# Cleaning Height data -------------------------------------------------------
separate(col = Height, into = c("Height (m)", NA, "Height (ft)", NA, "Quarried")) %>%
mutate(
`Height (m)` = as.integer(`Height (m)`),
`Height (ft)` = as.integer(`Height (ft)`),
Quarried = ifelse(Quarried == "quarried", "T", "F"),
Quarried = replace_na(Quarried, "F"),
Quarried = as.logical(Quarried)
# I don't know if multiple mutates in a row is kosher, but it works for me
) %>%
# Cleaning coordinates, only need one of these after all ---------------------
separate(col = Location, into = c(NA, NA, "Location"), sep = "/") %>%
# Cleaning age ---------------------------------------------------------------
separate(col = Age, into = c("Age", "Uncertainty (±)"), sep = "±")
# Manually update the values bc it's simpler than finding a particular function to do it
volcanoes[53,2] <- "30.55"
volcanoes[53,3] <- "0.45"
volcanoes[3,2] <- "130"
volcanoes[27,2] <- "45"
volcanoes[31,2] <- "130"
volcanoes[33,2] <- "0.62"
volcanoes[47,2] <- "28.0"
# Yes, this is horribly messy and gross. How'd you know? (but it works soooooo idc)
volcanoes <- volcanoes %>%
mutate(
Age = as.numeric(Age),
Age = Age * 1000,
Age = as.integer(Age),
`Uncertainty (±)` = as.numeric(`Uncertainty (±)`),
`Uncertainty (±)` = `Uncertainty (±)` * 1000,
`Uncertainty (±)` = as.integer(`Uncertainty (±)`)
)
# That one funny name that wouldn't parse perfectly
volcanoes[52,1] <- "Whakamuhu / Saint Heliers / Glover Park"
# Data output for however you want it ------------------------------------------
write_csv(volcanoes, "volcanoes.csv")
# Print dataset ================================================================
knitr::kable(volcanoes)
Volcanoes | Age | Uncertainty (±) | Height (m) | Height (ft) | Quarried | Location |
---|---|---|---|---|---|---|
Albert Park Volcano | 145000 | 4000 | NA | NA | FALSE | -36.8486; 174.7673 |
Ash Hill | 31800 | 400 | 30 | 98 | FALSE | -37.002754; 174.867545 |
Boggust Park Crater | 130000 | NA | 14 | 46 | FALSE | -36.955413; 174.813552 |
Cemetery Crater | NA | NA | 33 | 108 | FALSE | -36.989828; 174.841082 |
Crater Hill | 30400 | 800 | NA | NA | FALSE | -36.986546; 174.827135 |
Grafton Volcano | 106500 | NA | 82 | 269 | FALSE | -36.858440; 174.763624 |
Hampton Park | 57000 | 32000 | 43 | 141 | FALSE | -36.950925; 174.89544 |
Kohuora | 33700 | 2400 | 37 | 121 | FALSE | -36.97873; 174.842691 |
Māngere Lagoon | 59500 | NA | 20 | 66 | FALSE | -36.95702; 174.77763 |
Matanginui / Green Mount | 19600 | 6600 | 78 | 256 | FALSE | -36.939911; 174.898267 |
Matukutureia / McLaughlins Mountain | 48200 | 6400 | 73 | 240 | FALSE | -37.013511; 174.845974 |
Maungakiekie / One Tree Hill | 67000 | 12000 | 182 | 597 | FALSE | -36.90000; 174.78306 |
Maungarahiri / Little Rangitoto | 24600 | 600 | 75 | 246 | FALSE | -36.875407; 174.809636 |
Maungarei / Mount Wellington | 10000 | 1000 | 135 | 443 | FALSE | -36.89306; 174.846556 |
Maungataketake / Elletts Mountain | 88900 | 4800 | 76 | 249 | FALSE | -36.994635; 174.747548 |
Maungauika / North Head | 87500 | 15200 | 50 | 160 | FALSE | -36.827751; 174.81205 |
Maungawhau / Mount Eden | 28000 | 600 | 196 | 643 | FALSE | -36.877; 174.764 |
Motukorea / Browns Island | 24400 | 600 | 68 | 223 | FALSE | -36.8306; 174.8948 |
Mount Robertson / Sturges Park | 24300 | 800 | 78 | 256 | FALSE | -36.948477; 174.841726 |
Ōhinerau / Mount Hobson | 34200 | 1800 | 143 | 469 | FALSE | -36.877814; 174.786156 |
Ohuiarangi / Pigeon Mountain | 23400 | 800 | 55 | 180 | FALSE | -36.888846; 174.903116 |
Ōrākei Basin | 126000 | 6000 | 54 | 177 | FALSE | -36.867124; 174.81308 |
Ōtāhuhu / Mount Richmond | 30200 | 4200 | 50 | 160 | FALSE | -36.932562; 174.839451 |
Ōtuataua | 24200 | 1800 | 64 | 210 | FALSE | -36.98611; 174.75417 |
Ōwairaka / Te Ahi-kā-a-Rakataura / Mount Albert | 119200 | 5600 | 135 | 443 | FALSE | -36.890475; 174.720097 |
Puhinui Craters | NA | NA | 24 | 79 | FALSE | -37.01465; 174.83296 |
Pukaki Lagoon | 45000 | NA | 37 | 121 | FALSE | -36.982998; 174.810226 |
Pukeiti | 23700 | NA | 30 | 98 | FALSE | -36.983756; 174.757183 |
Pukekawa / Auckland Domain | 106000 | 8000 | 77 | 253 | FALSE | -36.859158; 174.775808 |
Pukewīwī / Puketāpapa / Mount Roskill | 105300 | 6200 | 110 | 360 | FALSE | -36.912286; 174.737371 |
Pukewairiki | 130000 | NA | 35 | 115 | FALSE | -36.944078; 174.865887 |
Pupuke | 193200 | 5600 | 34 | 112 | FALSE | -36.780115; 174.766184 |
Rangitoto Island | 620 | NA | 260 | 850 | FALSE | -36.786742; 174.860115 |
Rarotonga / Mount Smart | 20100 | 200 | 87 | 285 | TRUE | -36.91833; 174.81250 |
Styaks Swamp | 19100 | NA | 16 | 52 | FALSE | -36.936138; 174.900155 |
Takaroro / Mount Cambria | 42300 | 22000 | 30 | 98 | TRUE | -36.824444; 174.801933 |
Takarunga / Mount Victoria | 34800 | 4000 | 87 | 285 | FALSE | -36.8266; 174.7990 |
Taurere / Taylors Hill | 30200 | 200 | 56 | 184 | FALSE | -36.864223; 174.869943 |
Te Apunga-o-Tainui / McLennan Hills | 41300 | 2400 | 45 | 148 | TRUE | -36.929208; 174.846468 |
Te Hopua-a-Rangi / Gloucester Park | 31000 | NA | 12 | 39 | FALSE | -36.9295; 174.784734 |
Te Kopua Kai-a-Hiku / Panmure Basin | 25200 | 1800 | 35 | 115 | FALSE | -36.90495; 174.849343 |
Te Kopua-o-Matakamokamo / Tank Farm / Tuff Crater | 181000 | 2000 | 46 | 151 | FALSE | -36.8020; 174.7533 |
Onepoto | 187600 | NA | 46 | 151 | FALSE | -36.80818; 174.75085 |
Te Kōpuke / Tītīkōpuke / Mount St John | 75300 | 3400 | 126 | 413 | FALSE | -36.883431; 174.780196 |
Te Motu-a-Hiaroa / Puketutu | 29800 | 4400 | 65 | 213 | FALSE | -36.965186; 174.747248 |
Te Pane-o-Mataaho / Māngere Mountain | 59000 | 20000 | 106 | 348 | FALSE | -36.9496; 174.7831 |
Te Pou Hawaiki | 28000 | NA | 95 | 312 | TRUE | -36.88247; 174.766726 |
Te Puke ō Tara / Otara Hill | 56500 | NA | 89 | 292 | TRUE | -36.947105; 174.898363 |
Te Tātua-a-Riukiuta / Three Kings | 31000 | 1800 | 133 | 436 | FALSE | -36.902926; 174.754651 |
Te Tauoma / Purchas Hill | 10900 | 200 | 50 | 160 | TRUE | -36.887138; 174.847476 |
Waitomokia / Mt Gabriel | 20300 | 200 | 22 | 72 | TRUE | -36.976981; 174.770336 |
Whakamuhu / Saint Heliers / Glover Park | 161000 | 36000 | 65 | 213 | FALSE | -36.846911; 174.867662 |
Wiri Mountain / Matukutūruru | 30550 | 450 | 80 | 260 | TRUE | -37.007334; 174.858441 |
Well, ain’t that the question. For me, I created this because I plan on visiting all 53(!) historic volcanoes in Auckland. Seems fun, and I don’t have anything better to do. I went through the hassle of making this into a gmaps list, but it’s deeply inaccurate (I attached each point to a nearby place rather than its coordinates) so I used the code below in Volcanoes to visit generator to add it to a Google My Maps. However, the points seem a little inaccurate… I will report back from my adventures with any important changes.
Feel free to play with this, if you want a neat little dataset or some practice in R by trying to reproduce this.
Some fun stats (now easy to do with the data already in R):
Number of volcanoes | Total Height (m) | Total Height (ft) | Mean Age (thousand years) | Number of Quarried sites |
---|---|---|---|---|
53 | 3669 | 12016 | 60 | 8 |
Anyways. Have fun, enjoy the data! I might leave an update on any eventual updates, or perhaps a little table that is updated with info on the sites I have visited so far.
Peace and love <3 - alice
Version 1.1: added volcano tracker.
# Yes I tried to make a Shiny app. Yes I realized about an hour into it that it would not be static.
# Yes this hurts me.
volcanoes <- read_csv("volcanoes.csv")
visited <- tibble(
Volcanoes = c("Maungauika / North Head", "Takarunga / Mount Victoria", "Albert Park Volcano", "Maungakiekie / One Tree Hill", "Te Kōpuke / Tītīkōpuke / Mount St John", "Ōhinerau / Mount Hobson", "Ohuiarangi / Pigeon Mountain", "Te Kopua-o-Matakamokamo / Tank Farm / Tuff Crater", "Onepoto", "Maungarei / Mount Wellington", "Taurere / Taylors Hill", "Whakamuhu / Saint Heliers / Glover Park", "Maungawhau / Mount Eden", "Takaroro / Mount Cambria", "Te Pou Hawaiki", "Te Tātua-a-Riukiuta / Three Kings", "Te Hopua-a-Rangi / Gloucester Park", "Te Pane-o-Mataaho / Māngere Mountain", "Māngere Lagoon", "Ōrākei Basin", "Maungarahiri / Little Rangitoto", "Pukekawa / Auckland Domain", "Grafton Volcano", "Ash Hill", "Wiri Mountain / Matukutūruru", "Matukutureia / McLaughlins Mountain", "Puhinui Craters", "Pukewīwī / Puketāpapa / Mount Roskill", "Ōwairaka / Te Ahi-kā-a-Rakataura / Mount Albert"),
Visited = T, # Redundant column for future filters or whatever idk
`Date visited` = c("2023-07-11", "2023-07-11", "2023-07-17", "2023-07-22", "2023-07-22", "2023-07-22", "2023-07-30", "2023-08-03", "2023-08-03", "2023-08-27", "2023-08-27", "2023-08-27", "2023-09-01", "2023-09-17", "2023-09-27", "2023-10-13", "2023-10-14", "2023-10-14", "2023-10-14", "2023-11-04", "2023-11-04", "2023-10-26", "2023-08-05", "2023-11-06", "2023-11-06", "2023-11-06", "2023-11-06", "2023-11-13", "2023-11-13")
)
bigvisited <- inner_join(volcanoes, visited) %>%
select(Volcanoes, `Date visited`, Age, `Height (ft)`, Location) %>%
mutate(
`Date visited` = as.Date(`Date visited`)
)
summaryvisited <- inner_join(volcanoes, visited) %>%
summarize(
Location = paste0("n = ", n(), " / ", nrow(volcanoes), " (", round((n() / nrow(volcanoes)) * 100), "%)"),
`Height (ft)` = sum(`Height (ft)`, na.rm = T),
Age = as.integer(mean(Age, na.rm = T)),
Volcanoes = "Mean Age / Total Height"
)
knitr::kable(
bind_rows(bigvisited, summaryvisited) %>%
arrange(`Date visited`, Volcanoes)
)
Volcanoes | Date visited | Age | Height (ft) | Location |
---|---|---|---|---|
Maungauika / North Head | 2023-07-11 | 87500 | 160 | -36.827751; 174.81205 |
Takarunga / Mount Victoria | 2023-07-11 | 34800 | 285 | -36.8266; 174.7990 |
Albert Park Volcano | 2023-07-17 | 145000 | NA | -36.8486; 174.7673 |
Maungakiekie / One Tree Hill | 2023-07-22 | 67000 | 597 | -36.90000; 174.78306 |
Te Kōpuke / Tītīkōpuke / Mount St John | 2023-07-22 | 75300 | 413 | -36.883431; 174.780196 |
Ōhinerau / Mount Hobson | 2023-07-22 | 34200 | 469 | -36.877814; 174.786156 |
Ohuiarangi / Pigeon Mountain | 2023-07-30 | 23400 | 180 | -36.888846; 174.903116 |
Onepoto | 2023-08-03 | 187600 | 151 | -36.80818; 174.75085 |
Te Kopua-o-Matakamokamo / Tank Farm / Tuff Crater | 2023-08-03 | 181000 | 151 | -36.8020; 174.7533 |
Grafton Volcano | 2023-08-05 | 106500 | 269 | -36.858440; 174.763624 |
Maungarei / Mount Wellington | 2023-08-27 | 10000 | 443 | -36.89306; 174.846556 |
Taurere / Taylors Hill | 2023-08-27 | 30200 | 184 | -36.864223; 174.869943 |
Whakamuhu / Saint Heliers / Glover Park | 2023-08-27 | 161000 | 213 | -36.846911; 174.867662 |
Maungawhau / Mount Eden | 2023-09-01 | 28000 | 643 | -36.877; 174.764 |
Takaroro / Mount Cambria | 2023-09-17 | 42300 | 98 | -36.824444; 174.801933 |
Te Pou Hawaiki | 2023-09-27 | 28000 | 312 | -36.88247; 174.766726 |
Te Tātua-a-Riukiuta / Three Kings | 2023-10-13 | 31000 | 436 | -36.902926; 174.754651 |
Māngere Lagoon | 2023-10-14 | 59500 | 66 | -36.95702; 174.77763 |
Te Hopua-a-Rangi / Gloucester Park | 2023-10-14 | 31000 | 39 | -36.9295; 174.784734 |
Te Pane-o-Mataaho / Māngere Mountain | 2023-10-14 | 59000 | 348 | -36.9496; 174.7831 |
Pukekawa / Auckland Domain | 2023-10-26 | 106000 | 253 | -36.859158; 174.775808 |
Maungarahiri / Little Rangitoto | 2023-11-04 | 24600 | 246 | -36.875407; 174.809636 |
Ōrākei Basin | 2023-11-04 | 126000 | 177 | -36.867124; 174.81308 |
Ash Hill | 2023-11-06 | 31800 | 98 | -37.002754; 174.867545 |
Matukutureia / McLaughlins Mountain | 2023-11-06 | 48200 | 240 | -37.013511; 174.845974 |
Puhinui Craters | 2023-11-06 | NA | 79 | -37.01465; 174.83296 |
Wiri Mountain / Matukutūruru | 2023-11-06 | 30550 | 260 | -37.007334; 174.858441 |
Pukewīwī / Puketāpapa / Mount Roskill | 2023-11-13 | 105300 | 360 | -36.912286; 174.737371 |
Ōwairaka / Te Ahi-kā-a-Rakataura / Mount Albert | 2023-11-13 | 119200 | 443 | -36.890475; 174.720097 |
Mean Age / Total Height | NA | 71926 | 7613 | n = 29 / 53 (55%) |
Tracker last updated 2023-11-19.
Version 1.3 added to visit
csv generator for maps.
You know how I mentioned wanting to have a nice little export into google or something to easily know which volcanoes I still had to visit? Now is the time!
volcanoes <- volcanoes %>%
mutate(
Lat = str_split_i(Location, ";", 1),
Long = str_split_i(Location, ";", 2)
)
left_join(volcanoes, visited) %>%
filter(is.na(Visited)) %>%
write_csv(., "tovisit.csv")
And, now that we have that with Lat and Long specified in columns, we can import it into a Google My Map (because you can’t with normal gmaps?). A neat thing I found is that you can re-import specific layers, which is nice once a couple of volcanoes are ticked off the list.
The source code for this project is available at https://github.com/legallyahc/volcanic, with all work
done right in this RMarkdown file (dataset.rmd
) :3.
This project is based on information from Wikipedia which was authored by Wikipedia contributors and is licensed under CC-BY-SA 4.0. This project is licensed under the GNU General Public License v3 or later, a compatible license with CC-BY-SA 4.0.
Originally published on 2023-07-16. Version 1.3.1.