“Transcendental-Programmer”
first commit
c3cc0a9
import random
import json
from datetime import datetime, timedelta
import os
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class JobRecommendationDataGenerator:
def __init__(self):
self.job_types = ['Electrical', 'Mechanical', 'Plumbing', 'Carpentry', 'Cleaning', 'IT']
self.urgency_levels = ['Critical', 'High', 'Medium', 'Low']
self.hostel_names = ['Hostel A', 'Hostel B', 'Hostel C', 'Hostel D']
self.availability_statuses = ['Available', 'Busy', 'On Leave']
def generate_location(self):
return {
"hostel_name": random.choice(self.hostel_names),
"floor_number": random.randint(0, 4),
"room_number": f"{random.randint(1, 4)}{random.randint(0, 9)}{random.randint(0, 9)}"
}
def generate_worker(self, department=None):
if department is None:
department = random.choice(self.job_types)
return {
"worker_id": f"W{random.randint(10000, 99999)}",
"department": department,
"current_workload": random.randint(0, 5),
"availability_status": random.choice(self.availability_statuses),
"job_success_rate": round(random.uniform(0.80, 0.99), 2),
"current_location": self.generate_location()
}
def generate_sample(self, index):
job_type = random.choice(self.job_types)
location = self.generate_location()
# Generate workers list with at least one matching department
workers = [self.generate_worker(job_type)] # Ensure one matching worker
workers.extend([self.generate_worker() for _ in range(random.randint(2, 4))])
sample = {
"job_id": f"J{60000 + index}",
"type": job_type,
"description": f"{job_type} issue in room {location['room_number']}.",
"urgency_level": random.choice(self.urgency_levels),
"submission_timestamp": (datetime.utcnow() - timedelta(minutes=random.randint(0, 60))).strftime("%Y-%m-%dT%H:%M:%SZ"),
"location": location,
"workers": workers
}
return sample
def generate_dataset(self, num_samples, output_path):
logger.info(f"Generating {num_samples} samples...")
dataset = []
for i in range(num_samples):
sample = self.generate_sample(i)
dataset.append(sample)
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Save to JSON file
with open(output_path, 'w') as f:
json.dump(dataset, f, indent=2)
logger.info(f"Generated {len(dataset)} samples and saved to {output_path}")
return dataset
def main():
generator = JobRecommendationDataGenerator()
# Generate training data
train_samples = generator.generate_dataset(
20000,
'models/job_recommendation/train_data/training_data.json'
)
# Generate test data
test_samples = generator.generate_dataset(
4000,
'models/job_recommendation/test_data/test_data.json'
)
logger.info(f"Generated {len(train_samples)} training samples and {len(test_samples)} test samples")
if __name__ == "__main__":
main()