|
import random |
|
import json |
|
from datetime import datetime, timedelta |
|
import os |
|
import logging |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class JobRecommendationDataGenerator: |
|
def __init__(self): |
|
self.job_types = ['Electrical', 'Mechanical', 'Plumbing', 'Carpentry', 'Cleaning', 'IT'] |
|
self.urgency_levels = ['Critical', 'High', 'Medium', 'Low'] |
|
self.hostel_names = ['Hostel A', 'Hostel B', 'Hostel C', 'Hostel D'] |
|
self.availability_statuses = ['Available', 'Busy', 'On Leave'] |
|
|
|
def generate_location(self): |
|
return { |
|
"hostel_name": random.choice(self.hostel_names), |
|
"floor_number": random.randint(0, 4), |
|
"room_number": f"{random.randint(1, 4)}{random.randint(0, 9)}{random.randint(0, 9)}" |
|
} |
|
|
|
def generate_worker(self, department=None): |
|
if department is None: |
|
department = random.choice(self.job_types) |
|
|
|
return { |
|
"worker_id": f"W{random.randint(10000, 99999)}", |
|
"department": department, |
|
"current_workload": random.randint(0, 5), |
|
"availability_status": random.choice(self.availability_statuses), |
|
"job_success_rate": round(random.uniform(0.80, 0.99), 2), |
|
"current_location": self.generate_location() |
|
} |
|
|
|
def generate_sample(self, index): |
|
job_type = random.choice(self.job_types) |
|
location = self.generate_location() |
|
|
|
|
|
workers = [self.generate_worker(job_type)] |
|
workers.extend([self.generate_worker() for _ in range(random.randint(2, 4))]) |
|
|
|
sample = { |
|
"job_id": f"J{60000 + index}", |
|
"type": job_type, |
|
"description": f"{job_type} issue in room {location['room_number']}.", |
|
"urgency_level": random.choice(self.urgency_levels), |
|
"submission_timestamp": (datetime.utcnow() - timedelta(minutes=random.randint(0, 60))).strftime("%Y-%m-%dT%H:%M:%SZ"), |
|
"location": location, |
|
"workers": workers |
|
} |
|
|
|
return sample |
|
|
|
def generate_dataset(self, num_samples, output_path): |
|
logger.info(f"Generating {num_samples} samples...") |
|
dataset = [] |
|
for i in range(num_samples): |
|
sample = self.generate_sample(i) |
|
dataset.append(sample) |
|
|
|
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True) |
|
|
|
|
|
with open(output_path, 'w') as f: |
|
json.dump(dataset, f, indent=2) |
|
|
|
logger.info(f"Generated {len(dataset)} samples and saved to {output_path}") |
|
return dataset |
|
|
|
def main(): |
|
generator = JobRecommendationDataGenerator() |
|
|
|
|
|
train_samples = generator.generate_dataset( |
|
20000, |
|
'models/job_recommendation/train_data/training_data.json' |
|
) |
|
|
|
|
|
test_samples = generator.generate_dataset( |
|
4000, |
|
'models/job_recommendation/test_data/test_data.json' |
|
) |
|
|
|
logger.info(f"Generated {len(train_samples)} training samples and {len(test_samples)} test samples") |
|
|
|
if __name__ == "__main__": |
|
main() |