forked from tsunghao-huang/Python-Ports-Distance-Calculator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter-ports.py
executable file
·69 lines (54 loc) · 2.58 KB
/
filter-ports.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import os
import re
import pandas as pd
# Get the file path
__location__ = os.path.realpath(os.path.join(
os.getcwd(), os.path.dirname(__file__)))
# Load the coordinates of ports
df = pd.read_csv(os.path.join(
__location__, 'raw-data/UN-LOCODE.csv'), low_memory=False)
# Define a function to split the Coordinates field
def split_coordinates(coord):
if pd.isna(coord) or coord.strip() == '':
return None, None
match = re.match(r'(\d{2})(\d{2})([NS])\s*(\d{3})(\d{2})([EW])', coord)
if not match:
return None, None
lat_deg, lat_min, lat_dir, lon_deg, lon_min, lon_dir = match.groups()
# Calculate latitude
latitude = int(lat_deg) + int(lat_min) / 60.0
if lat_dir == 'S':
latitude = -latitude
# Calculate longitude
longitude = int(lon_deg) + int(lon_min) / 60.0
if lon_dir == 'W':
longitude = -longitude
return longitude, latitude
# Apply the function to split the Coordinates field and create 'Longitude' and 'Latitude' columns
df['Longitude'], df['Latitude'] = zip(
*df['Coordinates'].apply(split_coordinates))
# Create the 'Code' field (concatenation of 'Country' and 'Location')
df['Code'] = df['Country'] + df['Location']
# Exclude ports without valid coordinates (i.e., both Longitude and Latitude are not None)
df = df[df['Longitude'].notna() & df['Latitude'].notna()]
# # Filter records for sea ports (Function = '1')
# seaports_df = df[df['Function'].str.contains('1', na=False)]
# seaports_result = seaports_df[['Code', 'Name', 'NameWoDiacritics', 'Function', 'Longitude', 'Latitude']]
# seaports_result.sort_values(by='Code')
# seaports_result.to_csv(os.path.join(__location__, 'ports-sea.csv'), index=False)
# print("Sea port data has been saved to 'ports-sea.csv'")
# # Filter records for river ports (Function = '6')
# riverports_df = df[df['Function'].str.contains('6', na=False)]
# riverports_result = riverports_df[['Code', 'Name', 'NameWoDiacritics', 'Function', 'Longitude', 'Latitude']]
# riverports_result.sort_values(by='Code')
# riverports_result.to_csv(os.path.join(__location__, 'ports-river.csv'), index=False)
# print("River port data has been saved to 'ports-river.csv'")
# Filter records for all ports (Function = '1' or '6')
all_df = df[df['Function'].str.contains('1|6', na=False)]
ports_result = all_df[['Code', 'Name', 'NameWoDiacritics',
'Function', 'Longitude', 'Latitude']]
ports_result.sort_values(by='Code')
ports_result.to_csv(os.path.join(__location__, 'ports.csv'), index=False)
print("Ports data have been saved to 'ports.csv'")