1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
| import pandas as pd import random
EMPLOYED = 1 UNEMPLOYED = 0
excel_file = pd.ExcelFile('data.xls') df = excel_file.parse('data', header=None)
df.columns = [ 'id', 'person_id', 'name', 'sex', 'birthday', 'age', 'nation', 'marriage', 'edu_level', 'politic', 'reg_address', 'profession', 'religion', 'hukou_type', 'hukou_code', 'hukou_name', 'culture_level', 'grad_school', 'grad_date', 'major_code', 'major_name', 'person_type', 'military_status', 'is_disability', 'is_teen', 'is_elder', 'change_type', 'is_living_alone', 'live_status', 'remark', 'employment_id', 'employer_id', 'employment_date', 'is_contract', 'is_social_insurance', 'contract_start', 'contract_end', 'industry_code', 'employer', 'unemployment_audit_date', 'unemployment_id', 'unemployment_date', 'unemployment_reason', 'original_job_code', 'original_job_name', 'original_employment_form', 'job_willingness', 'training_willingness', 'unemployment_register_date', 'unemployment_cancel_date', 'unemployment_status', 'unemployment_type', 'original_company', 'is_unemployment_insurance', 'employment_status', 'n' ]
df = df[2:] df = df.reset_index(drop=True)
invalid_value = 'Employment status (1=employed, 0=unemployed)' df = df[df['employment_status'] != invalid_value]
print("Original data type of 'employment_status' column:", df['employment_status'].dtype) df['employment_status'] = df['employment_status'].astype(str).str.strip() print("Unique values of 'employment_status' column after cleaning:", df['employment_status'].unique()) print("Data type of 'employment_status' column after cleaning:", df['employment_status'].dtype)
df = df.dropna(subset=['age', 'edu_level'])
unemployed_data = df[df['employment_status'] == '0'].copy()
MAJOR_MAPPING = { '10000': 'Philosophy', '100100': 'Basic Medicine', '100400': 'Stomatology', '100500': 'Traditional Chinese Medicine', '100600': 'Forensic Medicine', '100800': 'Pharmacy', '100900': 'Management', '110000': 'Military Science', '20100': 'Economics', '20200': 'Business Administration', '40100': 'Education', '40300': 'Physical Education', '40400': 'Vocational and Technical Education', '50200': 'Foreign Languages and Literatures', '60100': 'History', '70000': 'Science', '70300': 'Chemistry', '70400': 'Biological Sciences', '70700': 'Geographical Sciences', '71100': 'Mechanics', '71200': 'Information and Electronic Sciences', '71400': 'Environmental Sciences', '71500': 'Psychology', '80500': 'Thermal Energy and Nuclear Energy', '80900': 'Hydraulic Engineering', '81000': 'Surveying and Mapping', '81500': 'Forestry Engineering', '82100': 'Engineering Mechanics', '90100': 'Plant Production', '100300': 'Clinical Medicine and Medical Technology', '100700': 'Nursing', '10100': 'Philosophy', '20000': 'Economics', '30000': 'Law', '30200': 'Sociology', '30400': 'Public Security', '40200': 'Ideological and Political Education', '50000': 'Literature', '50100': 'Chinese Language and Literature', '50500': 'Arts (II)', '60200': 'Library, Information and Archives Science', '70200': 'Physics', '70600': 'Geology', '70800': 'Geophysics', '71000': 'Marine Sciences', '71300': 'Materials Science', '80100': 'Geology and Mining', '80200': 'Materials', '80400': 'Instrumentation', '81100': 'Environment', '81600': 'Textile', '81800': 'Aeronautics and Astronautics', '82000': 'Public Security Technology', '90400': 'Animal Production and Veterinary Medicine', '90500': 'Fisheries', '90600': 'Management', '90700': 'Agricultural Extension', '100000': 'Medicine', '100200': 'Preventive Medicine', '10200': 'Marxist Theory', '30100': 'Law', '30300': 'Political Science', '40000': 'Education', '50300': 'Journalism', '50400': 'Arts (I)', '60000': 'History', '70100': 'Mathematics', '70500': 'Astronomy', '70900': 'Atmospheric Sciences', '71600': 'Science and Technology Information and Management', '80000': 'Engineering', '80300': 'Mechanical Engineering', '80600': 'Electrical Engineering', '80700': 'Electronics and Information', '80800': 'Civil Engineering', '81200': 'Chemical Engineering and Pharmacy', '81300': 'Light Industry, Food and Grain', '81400': 'Agricultural Engineering', '81700': 'Transportation', '81900': 'Weaponry', '82200': 'Management Engineering', '90000': 'Agriculture', '90200': 'Forest Resources', '90300': 'Environmental Protection' }
def give_employment_advice(row): advice = [] try: education = row['edu_level'] major_code = row['major_code'] major = MAJOR_MAPPING.get(major_code, 'Other Disciplines') if major_code != '990000' else 'Other Disciplines' age = int(row['age']) marriage = row['marriage'] is_disability = row['is_disability'] military_status = row['military_status']
if int(education) < 20: if age < 35: if is_disability == '0': other_jobs = ["Housekeeper", "Factory worker", "Warehouse laborer", "Agricultural worker"] advice.append(random.choice(other_jobs)) else: advice.append("Handicraft worker") elif 35 <= age < 50: if marriage in ['20', '40']: advice.append("Farmer") else: advice.append("Factory worker") else: if is_disability == '0': advice.append("Community cleaner") else: advice.append("Handicraft sorter") elif 20 <= int(education) < 40: if age < 35: if 'Economics' in major: other_jobs = ["Marketer", "Salesperson", "Business analyst", "Financial analyst"] advice.append(random.choice(other_jobs)) elif 'Chemistry' in major: other_jobs = ["Chemical process operator", "Chemical product analyst"] advice.append(random.choice(other_jobs)) elif 'Engineering' in major: other_jobs = ["Mechanical technician", "Electrical technician", "Equipment maintenance technician"] advice.append(random.choice(other_jobs)) elif 'Biological Sciences' in major: other_jobs = ["Food quality inspector", "Biomedical research assistant"] advice.append(random.choice(other_jobs)) elif 'Languages' in major: other_jobs = ["Tour guide", "Tourism marketing specialist"] advice.append(random.choice(other_jobs)) else: other_jobs = ["Content writer", "Graphic designer", "Data entry clerk", "Event planner"] advice.append(random.choice(other_jobs)) elif 35 <= age < 50: if marriage in ['20', '40']: if 'Agriculture' in major: advice.append("Agricultural processor") else: advice.append("Warehouse manager") else: if 'Engineering' in major: advice.append("Industrial operator") else: advice.append("Industrial operator") else: if is_disability == '0': advice.append("Elderly caregiver") else: advice.append("Handmade seller") else: if age < 35: if 'Chemistry' in major: other_jobs = ["Chemical researcher", "Chemical process engineer"] advice.append(random.choice(other_jobs)) elif 'Biological Sciences' in major: other_jobs = ["Biomedical researcher", "Drug development scientist"] advice.append(random.choice(other_jobs)) elif 'Engineering' in major: other_jobs = ["Mechanical engineer", "Electrical engineer", "Equipment design engineer"] advice.append(random.choice(other_jobs)) elif 'Management' in major: other_jobs = ["Production manager", "Project manager", "Business manager"] advice.append(random.choice(other_jobs)) elif 'Languages' in major: advice.append("Tourism marketing specialist") else: advice.append("Financial consultant") elif 35 <= age < 50: if 'Management' in major: advice.append("Production manager") else: advice.append("Technical expert") else: advice.append("Part - time trainer")
if military_status == '1': advice.append("Security guard") except ValueError: advice.append("Data error: unable to convert age or education level to integer.") except Exception as e: advice.append(f"Unexpected error: {str(e)}")
return ' '.join(advice)
if not unemployed_data.empty: unemployed_data['Employment Advice'] = unemployed_data.apply(lambda row: give_employment_advice(row), axis = 1) result = unemployed_data[['id', 'name', 'Employment Advice']] print(result) result.to_excel('unemployed_advice.xlsx', index = False) else: print("No unemployed people data was filtered.")
|