souvik0306 commited on
Commit
34b54e8
·
1 Parent(s): 46b910e

pyarrow for faster Parquet operations

Browse files
Dataset_prep/parquet_conversion.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ # Use 'pyarrow' or 'fastparquet' for faster Parquet operations
4
+ parquet_engine = 'pyarrow' # or 'fastparquet'
5
+
6
+ # Specify data types for columns if known
7
+ dtype = {
8
+ 'column1': 'int64',
9
+ 'column2': 'float64',
10
+ 'column3': 'object',
11
+ # Add other columns as needed
12
+ }
13
+
14
+ # Load CSV data with specified data types
15
+ df = pd.read_csv('airport.csv', dtype=dtype)
16
+
17
+ # Save as Parquet using the specified engine
18
+ df.to_parquet('airport.parquet', engine=parquet_engine)
airport.parquet ADDED
Binary file (363 kB). View file
 
app.py CHANGED
@@ -4,9 +4,8 @@ from map_generator import *
4
  from flight_distance import *
5
  from optimize import *
6
  from weather import *
7
-
8
- # Load airport data and aircraft data from CSV files
9
- airport_df = pd.read_csv(r'airport.csv') # Adjust the path to your CSV file
10
  aircraft_df = pd.read_csv(r'aircraft.csv') # Adjust the path to your CSV file
11
 
12
  airport_options = [f"{row['IATA']} - {row['Airport_Name']}" for _, row in airport_df.iterrows()]
@@ -117,7 +116,7 @@ def check_route(airport_selections, aircraft_type):
117
  return result, map_html
118
 
119
  # Gradio Interface
120
- with gr.Blocks(theme='dark') as demo:
121
  gr.Markdown("## Airport Route Feasibility Checker")
122
 
123
  # Place components in two columns for results and map
 
4
  from flight_distance import *
5
  from optimize import *
6
  from weather import *
7
+ # Load airport data and aircraft data from Parquet and CSV files
8
+ airport_df = pd.read_parquet(r'airport.parquet') # Adjust the path to your Parquet file
 
9
  aircraft_df = pd.read_csv(r'aircraft.csv') # Adjust the path to your CSV file
10
 
11
  airport_options = [f"{row['IATA']} - {row['Airport_Name']}" for _, row in airport_df.iterrows()]
 
116
  return result, map_html
117
 
118
  # Gradio Interface
119
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
120
  gr.Markdown("## Airport Route Feasibility Checker")
121
 
122
  # Place components in two columns for results and map
flight_distance.py CHANGED
@@ -24,7 +24,15 @@ def get_airport_lat_long(identifiers):
24
  Get latitude and longitude for a list of airport identifiers (IATA codes).
25
  """
26
  csv_file = 'airport.csv'
27
- df = pd.read_csv(csv_file)
 
 
 
 
 
 
 
 
28
  df_filtered = df[df['Airport_Name'].isin(identifiers) | df['IATA'].isin(identifiers)]
29
  lat_long_dict = {row['IATA']: (row['Lat'], row['Long']) for _, row in df_filtered.iterrows()}
30
  return lat_long_dict
 
24
  Get latitude and longitude for a list of airport identifiers (IATA codes).
25
  """
26
  csv_file = 'airport.csv'
27
+ parquet_file = 'airport.parquet'
28
+
29
+ # Try reading the parquet file first
30
+ try:
31
+ df = pd.read_parquet(parquet_file)
32
+ except FileNotFoundError:
33
+ # If parquet file is not found, fall back to reading the CSV file
34
+ df = pd.read_csv(csv_file)
35
+
36
  df_filtered = df[df['Airport_Name'].isin(identifiers) | df['IATA'].isin(identifiers)]
37
  lat_long_dict = {row['IATA']: (row['Lat'], row['Long']) for _, row in df_filtered.iterrows()}
38
  return lat_long_dict