#!/usr/bin/perl
use 5.34.0;

# extracts oceans, seas & bays from unzipped Geonames allCountries.zip
my $input_file  = 'allCountries.txt'; 
my $output_file = 'hydro_features.txt';

open my $in,  '<', $input_file  or die "Cannot open $input_file: $!";
open my $out, '>', $output_file or die "Cannot open $output_file: $!";

# Write a header
# 0   9985994:2727792
# 1   Yoldiabukta
# 2   Yoldiabukta
# 3   Yoldia Bay,Yoldiabukta  
# 4   78.4982 
# 5   14.56569
# 6   H     
# 7   BAY       
# 8   SJ              
# 9   00                              
# 10  0-9999   
# 11  Arctic/Longyearbyen
# 12  2023-12-06
print $out join "\t", qw(geonameid name feature_class feature_code
	  latitude longitude country_code admin1 admin2), "\n";

while (<$in>) {
    chomp;
    my @f = split /\t/;
  # GeoNames fields: 
	# 0=geonameid, 1=name, 6=feature_class, 7=feature_code, 
	# 4=lat, 5=lon, 8=country code, 10=admin1, 11=admin2
    my ($geoid, $name, $feat_class, $feat_code, $lat, $lon, 
		    $country, $admin1, $admin2) = @f[0,1,6,7,4,5,8,10,17];

    # Keep only hydrographic features
    next unless $feat_class eq 'H';
	  next unless $feat_code =~ /^(OCN|SEA|BAY)$/; # not interested in lakes, rivers, etc
 
    my $str = join "\t", $geoid,$name,$feat_class,$feat_code,$lat,
		  $lon,$country,$admin1,$admin2, "\n"; # say $str if $str =~ /Yoldiabukta/;
    print $out $str;
}

close $in;
close $out;

print "Hydrographic features extracted to $output_file\n";
