SQL 招聘网站岗位数据分析

news/2025/2/13 21:27:01/

数据清洗

1.删除包含空字段的行

create view v_data_clean_null as
select * from `data` d where 
job_href is not null and job_href != '' and
job_name is not null and job_name != '' and
company_href is not null and company_href != '' and
company_name is not null and company_name != '' and
providesalary_text is not null and providesalary_text != '' and
workarea is not null and workarea != '' and
workarea_text is not null and workarea_text != '' and
companytype_text is not null and companytype_text != '' and
degreefrom is not null and degreefrom != '' and
workyear is not null and workyear != '' and
updatedate is not null and updatedate != '' and
issuedate is not null and issuedate != '' and
parse2_job_detail is not null and parse2_job_detail != '';

2.按照企业和岗位进行去重保留最新一条

-- 去掉排序字段,把需要的字段都输入一遍
create view v_data_clean_distinct as
with p as 
(select *,
row_number () over (partition by company_name,job_name order by issuedate) as row1
from v_data_clean_null )
select id,job_href,job_name,company_href,company_name,providesalary_text,workarea,workarea_text,updatedate,companytype_text,degreefrom,workyear,issuedate,parse2_job_detail
from p where row1=1;

3.筛选招聘地区在北上广深

-- 过滤招聘地区
create view v_data_clean_workplace as 
select * from 
(select *,
case when workarea_text like '%北京%' then '北京' when workarea_text like '%上海%' then '上海' when workarea_text like '%广州%' then '广州' when workarea_text like '%深圳%' then '深圳' 
end as workplace
from v_data_clean_distinct ) a where a.workplace is not null;

4.过滤周边岗位保留任职要求包含数据的岗位

create view v_data_clean_jobname as
select * from v_data_clean_workplace vdcw where job_name like '%数据%';
-- 最终清洗结果
create view v_data_clean as
(select * from v_data_clean_jobname);

市场需求量

需求1:按照城市分组统计招聘总量和招聘职位数

create view v_data_market_demand as
select workplace as '城市',
sum(degreefrom) as '招聘总量',
count(*) as '职位数'
from v_data_clean group by workplace;

 

就业企业类型分布

需求2:按照企业类型进行招聘量的统计及招聘占比计算

create view v_data_companytype_degree as
select companytype_text as '企业类型',
companytype_degreefrom as '招聘总量',
concat(cast(companytype_degreefrom /sum_degreefrom*100 as decimal(4,2)),'%') as '招聘占比'  
from 
(select companytype_text ,
sum(degreefrom) as companytype_degreefrom 
from v_data_clean group by companytype_text) f1,
(select sum(degreefrom) as sum_degreefrom from v_data_clean) f2
order by companytype_degreefrom desc;

岗位薪资

需求3:计算岗位薪资的单位,最大,最小值,均值

create view v_data_salary_min_max_mean as
with p as
(select * ,
cast(
(case when unit=10000 then substring_index(substring_index(providesalary_text,'万/月',1),'-',1)when unit=1000 then substring_index(substring_index(providesalary_text,'千/月',1),'-',1)when unit=833 then substring_index(substring_index(providesalary_text,'万/年',1),'-',1)
end ) as decimal(10,2))*unit as salary_min,
cast(
(case when unit=10000 then substring_index(substring_index(providesalary_text,'万/月',1),'-',-1)when unit=1000 then substring_index(substring_index(providesalary_text,'千/月',1),'-',-1)when unit=833 then substring_index(substring_index(providesalary_text,'万/年',1),'-',-1)
end ) as decimal(10,2))*unit as salary_max
from v_data_salary_unit) 
select *,cast((salary_min+salary_max)/2 as decimal(10,2)) as salary_mean from p;

 需求4:按照工作年限分组,计算各组平均薪资

create view v_data_workyear_salary as
select workyear as '工作年限',
avg(salary_mean) as '平均薪资'
from v_data_salary_min_max_mean 
group by workyear 
order by length(workyear),workyear ;

需求5:按照企业类型分组,计算各组平均薪资

create view v_data_companytype_salary as
select companytype_text  as '企业类型',
avg(salary_mean) as '平均薪资'
from v_data_salary_min_max_mean 
group by companytype_text 
order by avg(salary_mean) desc ;

 

岗位核心技能

需求6:查询技能点在招聘任职要求中出现的次数及前30

create view v_data_skill_quantity as
select st.skill ,
count(*) as quantity
from v_data_clean v 
inner join skill_table st on v.parse2_job_detail like concat('%',st.skill,'%')
group by st.skill 
order by quantity desc limit 30;

需求7:计算各个技能点出现的频率

create view v_data_skill as
select skill as '技能点',
quantity as '出现频数',
concat(cast(quantity/total_quantity*100 as decimal(10,2)),'%') as '出现频率'
from v_data_skill_quantity ,(select count(*) as total_quantity from v_data_clean ) as f;

 总结

1.上海对于数据分析师需求最大

2.数据分析师在工作第5年薪资即可翻倍

3.民营企业对数据分析师需求最大

4.SQL,大数据,EXCEL,报告撰写等是数据分析岗位中普遍的要求


http://www.ppmy.cn/news/59963.html

相关文章

Golang-map底层原理刨析

map底层原理刨析 [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-yz8FjpJt-1683362716010)(https://gitee.com/cqfbest/md/raw/master/img2//v2-95697e3e2584722431ab1c37f3a61736_1440w.jpg)] Go 语言内置了 map 数据结构, map 的底层便是一个 Has…

解决echarts图表随窗口宽度变化而改变图表的大小

文章目录 前言一、演示前后对比效果二、解決方法1.在代码结尾加上监听方法2.示例 三、总结扩展问题 前言 很多同学在使用echarts时遇到了浏览器窗口大小发生变化时,图表大小没有自适应窗口的宽度,下面我将对比演示随着窗口大小变化,echarts图…

container的讲解

我们做开发经常会遇到这样的一个需求,要开发一个响应式的网站,但是我们需要我们的元素样式跟随着我们的元素尺寸大小变化而变化。而我们常用的媒体查询(Media Queries)检测的是视窗的宽高,根本无法满足我们的业务需求&…

urllib爬取图片

使用 urllib 库来爬取图片 import urllib.request# 图片的 URL 链接 image_url "http://img.netbian.com/file/2023/0415/235643ofSA0.jpg"# 获取图片并保存到指定路径 urllib.request.urlretrieve(image_url, "image.jpg")这段代码中,首先指定…

分析linux中动态库so文件的常用方法

前言 在linux系统中,我们经常会遇到各种各样的动态库文件,常见的是.so后缀,那么我们应该如何分析这些文件的用途和作用呢?毕竟我们不能一知半解的“搞事情”。 正文 查看文件属性 首先,我们从整体上了解一下该文件的基…

【Unity-UGUI控件全面解析】| Toggle 开关组件详解

🎬【Unity-UGUI控件全面解析】| Toggle 开关组件详解一、组件介绍二、组件属性面板三、代码操作组件四、组件常用方法示例4.1 监听开关事件五、组件相关扩展使用5.1 配合Toggle Group组使用💯总结🎬 博客主页:https://xiaoy.blog.csdn.net 🎥 本文由 呆呆敲代码的小Y …

车载搭载SystemUI音频技术,全方位呈现高品质音效

SystemUI概述 SystemUI 是 Android 操作系统中的一个系统服务,主要负责管理和显示系统界面元素,例如状态栏、通知栏、键盘和屏幕截图等。SystemUI 系统服务是系统级别的组件,提供的功能对于用户体验和系统安全性都非常重要。 SystemUI 的主…

DP练习题

1.减操作(ACWING) 若有 a b c d e f g 几个数, 先对位置d操作 变成 a b c d - e f g 再对c操作 变成 a b c - (d-e) f g 仔细分析后得出结论:对于第一个数如a, 它一定为正数,第二个数b,一定为负数&#…