Exploring biodiversity data is tidier than ever

Dax Kellie, Jenna Wraith, Martin Westgate

@daxkellie

2.1 billion records

in the Global Biodiversity Information Facility (GBIF)

  • Tanzania (TanBIF) - 1.5 million records
  • Brazil (SiBBr) - 23 million records
  • Sweden (SBDI) - 103 million records
  • Australia (ALA) - 112 million records

living-atlases.gbif.org

taxize & rgbif

taxize & rgbif

library(taxize)
as.tsn(get_tsn("Quercus douglasii"))
══  1 queries  ═══════════════
✔  Found:  Quercus douglasii
══  Results  ═════════════════

• Total: 1 
• Found: 1 
• Not Found: 0
[1] "19322"
attr(,"class")
[1] "tsn"
attr(,"match")
[1] "found"
attr(,"multiple_matches")
[1] FALSE
attr(,"pattern_match")
[1] FALSE
attr(,"uri")
[1] "https://www.itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=19322"

taxize & rgbif

A simple example

library(rgbif)
occ_download(
  pred("taxonKey", 2436775), 
  pred("hasGeospatialIssue", FALSE),
  pred("hasCoordinate", TRUE),
  pred("occurrenceStatus","PRESENT"), 
  pred_not(pred_in("basisOfRecord",c("FOSSIL_SPECIMEN","LIVING_SPECIMEN"))),
  format = "SIMPLE_CSV"
)

taxize & rgbif

A not-so-simple example

occ_download(
type="and",
    pred("taxonKey", 2436775),
    pred("hasGeospatialIssue", FALSE),
    pred("hasCoordinate", TRUE),
    pred("occurrenceStatus","PRESENT"), 
    pred_gte("year", 1900),
    pred_not(pred_in("basisOfRecord",c("FOSSIL_SPECIMEN","LIVING_SPECIMEN"))),
  pred_or(
    pred("country","ZA"),
    pred("gadm","ETH")
    ),
  pred_or(
    pred_not(pred_in("establishmentMeans",c("MANAGED","INTRODUCED"))),
    pred_isnull("establishmentMeans")
    ),
  pred_or(  
    pred_lt("coordinateUncertaintyInMeters",10000),
    pred_isnull("coordinateUncertaintyInMeters")
    ),
format = "SIMPLE_CSV"
)

galah

galah

  • Query the ALA (and other national GBIF nodes)
  • Use tidy, pipe-able syntax

Build a query

Record counts

library(galah)

galah_call() |>
  galah_identify("Eolophus roseicapilla") |> # galahs
  atlas_counts()
# A tibble: 1 × 1
   count
   <int>
1 991859

galah_filter galah_group_by galah_select

Record counts

galah_call() |>
  galah_identify("Eolophus roseicapilla") |>
  galah_filter(year >= 2010,
               dataResourceName == "iNaturalist Australia") |>
  atlas_counts()
# A tibble: 1 × 1
  count
  <int>
1  7148

Record counts

galah_call() |>
  galah_identify("Eolophus roseicapilla") |>
  galah_filter(year >= 2010,
               dataResourceName == "iNaturalist Australia") |>
  galah_group_by(year) |>
  atlas_counts()
# A tibble: 13 × 2
   year  count
   <chr> <int>
 1 2021   1931
 2 2020   1569
 3 2019    942
 4 2018    820
 5 2022    781
 6 2017    535
 7 2016    191
 8 2015    107
 9 2014     79
10 2013     62
11 2011     54
12 2012     41
13 2010     36

Record counts

galah_call() |>
  galah_identify("Cacatuidae") |> # cockatoos
  galah_filter(year >= 2019) |>
  galah_group_by(year, dataResourceName) |>
  atlas_counts()
# A tibble: 80 × 3
   year  dataResourceName                                count
   <chr> <chr>                                           <int>
 1 2021  eBird Australia                                248142
 2 2021  iNaturalist Australia                            7612
 3 2021  NSW BioNet Atlas                                 1419
 4 2021  Earth Guardians Weekly Feed                       927
 5 2021  SA Fauna (BDBSA)                                  300
 6 2021  NatureMapr                                        166
 7 2021  WildNet - Queensland Wildlife Data                153
 8 2021  ALA species sightings and OzAtlas                 118
 9 2021  Wildlife Watch NSC                                105
10 2021  Port Adelaide Enfield Flora & Fauna Monitoring     37
# … with 70 more rows

Species lists

galah_call() |>
  galah_identify("peramelidae") |> # bandicoots & bilbies
  galah_filter(year == 2021,
               cl22 == "New South Wales") |> # cl22 = states/territories id
  atlas_species()
# A tibble: 3 × 10
  kingdom  phylum   class    order      family genus species author species_guid
  <chr>    <chr>    <chr>    <chr>      <chr>  <chr> <chr>   <chr>  <chr>       
1 Animalia Chordata Mammalia Peramelem… Peram… Pera… Perame… Geoff… urn:lsid:bi…
2 Animalia Chordata Mammalia Peramelem… Peram… Isoo… Isoodo… (Goul… urn:lsid:bi…
3 Animalia Chordata Mammalia Peramelem… Peram… Isoo… Isoodo… (Shaw… urn:lsid:bi…
# … with 1 more variable: vernacular_name <chr>


Species occurrences

# Enter email
galah_config(email = "dax.kellie@csiro.au")

galah_call() |>
  galah_identify("peramelidae") |>
  galah_filter(year == 2021,
               cl22 == "New South Wales") |> # states/territories
  atlas_occurrences()
# A tibble: 2,516 × 7
   decimalLatitude decimalLongitude eventDate      scientificName taxonConceptID
             <dbl>            <dbl> <chr>          <chr>          <chr>         
 1           -37.4             150. 2021-07-07T14… Perameles nas… urn:lsid:biod…
 2           -37.4             150. 2021-07-06T14… Perameles nas… urn:lsid:biod…
 3           -37.4             150. 2021-01-19T13… Perameles nas… urn:lsid:biod…
 4           -37.4             150. 2021-03-30T13… Perameles nas… urn:lsid:biod…
 5           -37.3             150. 2021-12-15T13… Isoodon obesu… urn:lsid:biod…
 6           -37.2             150. 2021-10-12T13… Isoodon        urn:lsid:biod…
 7           -37.2             150. 2021-10-12T13… Isoodon obesu… urn:lsid:biod…
 8           -37.2             150. 2021-10-12T13… Isoodon obesu… urn:lsid:biod…
 9           -37.2             150. 2021-10-12T13… Perameles nas… urn:lsid:biod…
10           -37.2             150. 2021-11-23T13… Perameles nas… urn:lsid:biod…
# … with 2,506 more rows, and 2 more variables: recordID <chr>,
#   dataResourceName <chr>

Species occurrences

# Enter email
galah_config(email = "dax.kellie@csiro.au")

galah_call() |>
  galah_identify("peramelidae") |>
  galah_filter(year == 2021,
               cl22 == "New South Wales") |> # states/territories
  galah_select(scientificName, eventDate) |>
  atlas_occurrences()
# A tibble: 2,516 × 2
   scientificName    eventDate           
   <chr>             <chr>               
 1 Isoodon           2021-06-19T14:00:00Z
 2 Isoodon           2021-10-12T13:00:00Z
 3 Isoodon           2021-01-20T13:00:00Z
 4 Isoodon           2021-04-07T14:00:00Z
 5 Isoodon           2021-01-20T13:00:00Z
 6 Isoodon           2021-07-04T14:00:00Z
 7 Isoodon           2021-01-20T13:00:00Z
 8 Isoodon           2021-10-20T13:00:00Z
 9 Isoodon macrourus 2021-03-27T13:00:00Z
10 Isoodon macrourus 2021-03-28T13:00:00Z
# … with 2,506 more rows

Plot species occurrences

library(galah)
library(ozmaps)
library(sf)
library(ggplot2)

# Enter email
galah_config(email = "dax.kellie@csiro.au")

# Download species occurrences
obs <- galah_call() |>
  galah_identify("peramelidae") |>
  galah_filter(year == 2021) |>
  atlas_occurrences()

# Ensure map uses correct projection
oz_wgs84 <- ozmap_data(data = "country") |>
  st_transform(crs = st_crs("WGS84"))

# Map points
ggplot(data = obs) + 
  geom_sf(data = oz_wgs84, 
          fill = "white") +
  geom_point(aes(x = decimalLongitude,
                 y = decimalLatitude), 
             color = "#78cccc") +
  theme_void()

Plot species occurrences

Query other Living Atlases from galah?

GBIF.ES

galah_config(atlas = "Spain")

galah_call() |> atlas_counts()
# A tibble: 1 × 1
     count
     <int>
1 36237636

Biodiversitäts-Atlas Österreich

galah_config(atlas = "Austria")

galah_call() |> atlas_counts()
# A tibble: 1 × 1
    count
    <int>
1 7786013

Share ways to code

atlasoflivingaustralia.github.io/galah

labs.ala.org.au

labs.ala.org.au

Downloading biodiversity data is tidier than ever

  • galah makes downloading data like wrangling data with dplyr

  • Package architecture is flexible for other biodiversity databases




Thank you

Search fields

How to search for fields to filter by

# text search to find state/territory field id
search_fields("australian states") # id = cl22
# A tibble: 2 × 4
  id     description                                                 type  link 
  <chr>  <chr>                                                       <chr> <chr>
1 cl2013 ASGS Australian States and Territories Australian Statisti… laye… http…
2 cl22   Australian States and Territories Australian States and Te… laye… http…
search_field_values("cl22")
# A tibble: 11 × 2
   field category                    
   <chr> <chr>                       
 1 cl22  New South Wales             
 2 cl22  Victoria                    
 3 cl22  Queensland                  
 4 cl22  South Australia             
 5 cl22  Western Australia           
 6 cl22  Australian Capital Territory
 7 cl22  Northern Territory          
 8 cl22  Tasmania                    
 9 cl22  Unknown1                    
10 cl22  Ashmore and Cartier Islands 
11 cl22  Coral Sea Islands