DXGI高帧率屏幕录像软件源码解析（声音捕获，抓屏，ffmpeg录像，MP4录像，flv录像，麦克风采集）（第4篇编码，录像部分）

本文DEMO源码下载：

https://download.csdn.net/download/xjb2006/85109025

dxgi桌面屏幕录像（windows屏幕录像，硬件编码，声音捕获，音视频同步）

由于篇幅有限，分为4篇发表：

1、SDK接口一览：

2、声音采集部分:

3、屏幕捕获部分：

4、编码，录像部分：

距离上篇文章已经过了快1年了，才有时间把正式DEMO传上来，直接上个截图看看吧：

该DEMO演示了win10屏幕录像的核心功能，包含音源选择（支持麦克风，计算机声音和2者混合），屏幕选择（主副屏选择），鼠标，帧率，码率，硬件编码，实时预览，双录制（同时录制为flv,mp4）等基本功能。为了扩展需要，程序核心模块做成DLL动态库，可以多语言扩展，适用于C++，C#，JAVA，VB，Python等等其他语言。

视频编码：

DXGI输出一般是RGB32，需要将颜色空间转换为YV12或者NV12等才能进行编码，现支持libx264，h264_qsv，h264_nvenc，至于h264_amf，由于暂无测试环境，无法确认是否能用。

颜色空间转换采用libyuv或者ipp进行高效转换，经比较，效率较ffmpeg更高。

优先选择硬件加速（GPU）编码时，会自动判断是否有h264_qsv，h264_nvenc，如果没有则选择libx264，当然也可以手动指定编码器。

将RGB32进行缩放的libyuv算法：

static void ResizeRGB32(BYTE *pIn,int scx,int scy,BYTE *pOut,int dcx,int dcy)
{
int ret=libyuv::ARGBScale(pIn,scx*4,scx,scy,pOut,dcx*4,dcx,dcy,libyuv::kFilterLinear);
ret=0;
}

验证编码器的函数：

bool CFFFindEncoder::Find(char* szName)
{
   //MessageBox(0,L"1",0,0);
   AVCodecContext *c= NULL;
//AVFrame *frame;
AVPacket *pkt;
   AVCodec *codec=avcodec_find_encoder_by_name(szName);
   if(codec==0)
       return false;
   //MessageBox(0,L"2",0,0);
c = avcodec_alloc_context3(codec);
if (!c)
   {
       return false;
}
   //MessageBox(0,L"3",0,0);
pkt = av_packet_alloc();
if (!pkt)
   {
       return false;
}
c->bit_rate = 8000000;
c->width = 1920;
c->height = 1080;
   c->time_base.num=1;
c->time_base.den=25;
   c->framerate.num=25;
c->framerate.den=1;

c->gop_size = 10;
c->max_b_frames = 0;
c->pix_fmt = AV_PIX_FMT_NV12;//AV_PIX_FMT_YUV420P;

if (codec->id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);

int ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
return false;
}
//MessageBox(0,L"4",0,0);

avcodec_free_context(&c);
av_packet_free(&pkt);
return true;
}

音频编码：

包含了音频转码，混音，编码等，由于本人不太喜欢ffmpeg的AAC编码必须输入为AV_SAMPLE_FMT_FLTP，我直接用了libfaac库进行编码，输入为S16。ACM比较简单，网上也很好搜到，我贴ffmpeg的音频转码封装吧。对于AAC编码，libfaac也比较简单，就不贴代码了。

转码代码：

#pragma once
#include "stdafx.h"
class CAudioACM
{
private:char *audio_out_buffer=0;SwrContext* audio_convert_ctx = 0;int m_nInSampleRate;int m_nInChannel;int m_nOutSampleRate;int m_nOutChannel;AVSampleFormat in_sample_fmt;AVSampleFormat out_sample_fmt;int Getbytes_per_sample(int sampleFormat){int bytes_per_sample = 2;switch (sampleFormat){case AV_SAMPLE_FMT_U8P:case AV_SAMPLE_FMT_U8:bytes_per_sample = 8 >> 3;break;case AV_SAMPLE_FMT_S16P:case AV_SAMPLE_FMT_S16:bytes_per_sample = 16 >> 3;break;case AV_SAMPLE_FMT_S32:case AV_SAMPLE_FMT_S32P:case AV_SAMPLE_FMT_FLT:case AV_SAMPLE_FMT_FLTP:bytes_per_sample = 32 >> 3;break;case AV_SAMPLE_FMT_DBL:case AV_SAMPLE_FMT_DBLP:bytes_per_sample = 64 >> 3;break;default:bytes_per_sample = 0;break;}return bytes_per_sample;}public:void Init(int nInSampleRate, int nInChannel,int nInFMT, int nOutSampleRate, int nOutChannel, int nOutFMT)//nFMT:0S16  1FLTP{if (audio_out_buffer){delete[]audio_out_buffer;audio_out_buffer = 0;}audio_out_buffer = new char[1024 * 1024];m_nInSampleRate = nInSampleRate;m_nInChannel = nInChannel;m_nOutSampleRate = nOutSampleRate;m_nOutChannel = nOutChannel;in_sample_fmt = nInFMT==0?AV_SAMPLE_FMT_S16: AV_SAMPLE_FMT_FLTP;out_sample_fmt = nOutFMT == 0 ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_FLTP;audio_convert_ctx = swr_alloc_set_opts(0, av_get_default_channel_layout(nOutChannel), out_sample_fmt, nOutSampleRate,av_get_default_channel_layout(nInChannel), in_sample_fmt, nInSampleRate, 0, NULL);//配置源音频参数和目标音频参数  int n1 = av_get_default_channel_layout(nOutChannel);int n4 = av_get_default_channel_layout(nInChannel);swr_init(audio_convert_ctx);}void InitFrame(AVFrame ** srcFrame,char* pPCM,int nSize,int samplerate,int channel,int format= AV_SAMPLE_FMT_S16){*srcFrame = av_frame_alloc();(*srcFrame)->nb_samples = nSize/ channel/2;(*srcFrame)->channels = channel;(*srcFrame)->channel_layout = av_get_default_channel_layout(channel);(*srcFrame)->format = format;(*srcFrame)->sample_rate = samplerate;(*srcFrame)->data[0] = (*srcFrame)->extended_data[0] = (uint8_t*)pPCM;avcodec_fill_audio_frame(*srcFrame, channel, (AVSampleFormat)format, (const uint8_t*)pPCM, nSize, 0);}int Resample(char *pIn,int nLen, AVFrame* pOutFrame){AVFrame* srcFrame = 0;InitFrame(&srcFrame, pIn, nLen, m_nInSampleRate, m_nInChannel);Resample(srcFrame, pOutFrame);av_frame_free(&srcFrame);return 0;}int Resample(AVFrame * pFrame_A, AVFrame* pOutFrame){if (!audio_convert_ctx)return 0;int bytes_per_sample = Getbytes_per_sample(out_sample_fmt);int in_samples_per_channel = pFrame_A->nb_samples;if (pFrame_A->sample_rate < 8000)pFrame_A->sample_rate = 8000;int out_samples_per_channel = av_rescale_rnd(128 + in_samples_per_channel, m_nOutSampleRate, pFrame_A->sample_rate, AV_ROUND_UP);int size_per_sample_with_channels = m_nOutChannel * bytes_per_sample;int out_size = out_samples_per_channel * size_per_sample_with_channels;unsigned char* out[] = { (unsigned char*)audio_out_buffer };//int converted_samplers_per_channel = swr_convert(audio_convert_ctx, out, out_samples_per_channel, (const uint8_t**)pFrame_A->extended_data, in_samples_per_channel);int converted_samplers_per_channel = swr_convert(audio_convert_ctx, out, in_samples_per_channel, (const uint8_t**)pFrame_A->extended_data, in_samples_per_channel);if (converted_samplers_per_channel > 0){pOutFrame->nb_samples = converted_samplers_per_channel;pOutFrame->sample_rate = m_nOutSampleRate;pOutFrame->channels = m_nOutChannel;pOutFrame->extended_data[0]=pOutFrame->data[0]= out[0];
//			pOutFrame->linesize[0] = pFrame_A->linesize[0];//m_pcmfifo.push((BYTE*)audio_out_buffer, converted_samplers_per_channel * size_per_sample_with_channels);}return 0;}void close(){if (audio_out_buffer){delete[]audio_out_buffer;audio_out_buffer = 0;}if (audio_convert_ctx){swr_close(audio_convert_ctx);	//xjb2018swr_free(&audio_convert_ctx);}}public:CAudioACM(){audio_convert_ctx = 0;m_nInSampleRate = 44100;m_nInChannel = 2;m_nOutSampleRate = 44100;m_nOutChannel = 2;}~CAudioACM(){close();}};

再贴一个简单的混音代码，因为要同时录制麦克风声音+计算机声音，就需要同时转换为44100，双声道，16位，然后进行混音，算法也比较简单，就是两个声音相加，我这里减半相加，都差不多，看你自己选择！

static void WaveMix(BYTE *pBuf0,BYTE *pBuf1,BYTE *pBufOut,int nLen)
{	int all=0;short w,w1;WORD wOK;for(int i=0;i<nLen;i+=2){w=MAKEWORD(pBuf0[i],pBuf0[i+1]);w=w/2;w1=MAKEWORD(pBuf1[i],pBuf1[i+1]);w1=w1/2;all=w+w1;if(all>32767)all=32767;if(all<-32767)all=-32767;wOK= all;(pBufOut)[i]=LOBYTE(wOK);(pBufOut)[i+1]=HIBYTE(wOK);}
}

录像：

前面已经得到了AAC和H264的数据了，我们就可以mix成MP4文件了，合成按照ffmpeg例子步骤来，我贴音视频同步的代码吧，先说说音视频同步，其实很简单的，很多人搞不懂。我们现实中用的是时分秒的计时单位，音视频中常用是毫秒（ms）。音视频中也是一样的，不过时间戳单位并不是ms，以时间戳AVRational time_base这个为例子， pAVCodecContext->time_base.num = 1; pAVCodecContext->time_base.den = 90000;一般会看到这种表达方式，这里的时间戳表示的单位就不是1ms了，而是1/90000秒为单位。

音频的时间戳单位：time_base.den = 44100；

视频的时间戳单位：time_base.den = 90000；

上面的时间我们给她个名字吧，叫音视频编码器时间，而下面说的是ms时间。我们要做的事情就是把ms时间转成音视频编码器时间。

音视频同步：一般我是以音频为参考，因为音频不能丢，可以想象，如果音频丢了一帧比视频丢了一帧影响大得多，视频多一帧少一帧看不出来的。视频去同步音频，我们认为音频硬件是可以信任的时间计时单位，读取1024个sample就是23ms。视频去同步音频，视频可以用帧率去计算时间，公式就变成这样：__int64 vtime = m_nVideoCount * 1000/m_fps;但是有个问题，你能保证编码帧率绝对30帧？显然不可能，所以我们用系统时间去计算，公式就变成这样：__int64 vtime = (av_gettime() - m_nStartVideoTime) / 1000;（m_nStartVideoTime是开始编码时间）。

音频ms时间的代码：

__int64 COutPutStream::GetAudioTime()
{
   if (m_samplerate > 0)
       return (double)(m_nAudioCount) * (double)1024 * (double)1000 / (double)m_samplerate;
   return 0;
}

我们演算下，输入m_nAudioCount=1时，得到的时间为23.22ms，是吧！

视频ms时间的代码：

__int64 COutPutStream::GetVideoTime()
{
   if (m_nStartVideoTime > 0)
   {
       __int64 vtime = (av_gettime() - m_nStartVideoTime) / 1000;
       //__int64 vtime = m_nVideoCount * 1000/m_fps;//
       return vtime;
   }
   return 0;
}
这就好理解了，时间戳那里直接填：

视频时间戳：

    int  lTimeStamp=this->GetVideoTime()*1000;enc_pkt.stream_index = pAVStream_Video->index;enc_pkt.dts = enc_pkt.pts = (INT64)90000 * (lTimeStamp) / AV_TIME_BASE;//转换为视频编码器时间AVRational timebase;timebase.den = 90000;    timebase.num = 1;av_packet_rescale_ts(&enc_pkt, timebase, pAVStream_Video->time_base);//转换为mux时间av_interleaved_write_frame(pOutAVFormatContext, &enc_pkt);

音频时间戳：

	pkt.size = nAACSize;pkt.pts = pkt.dts = 1024 * m_nAudioCount;//转为音频编码时间（看得懂吗？多理解下，呵呵）AVRational timebase;timebase.den = m_samplerate;//xiaotimebase.num = 1;av_packet_rescale_ts(&pkt, timebase, pAVStream_Audio->time_base);//转为mux时间m_nAudioCount++;...av_interleaved_write_frame(pOutAVFormatContext, &pkt);

这样就高枕无忧了？理论上是，但是实际上，音频和视频同步上可能会有偏差，我们就要进行调节，我是这样做的，在H264视频写入代码段内，如果音频时间-视频时间>100ms，那么视频时间戳m_nStartVideoTime-=(5*1000);代码实现：（如果你有更好的方法请告诉我！）

	if (audioMS - dbMS > 100 || audioMS - dbMS < -100)//强制同步{if(audioMS - dbMS > 100){m_nStartVideoTime-=(5*1000);//10ms}else{m_nStartVideoTime+=(5*1000);//10ms}}

这样就可以高枕无忧了，万事大吉，今晚吃鸡！

补充下为什么要双录（录成mp4和flv），这一切都是为了保险而生！因为mp4并不是为流媒体而生的，她的音视频重要信息都存储在文件结尾，而且要停止录像时才存储，所以中途一旦断电或者程序崩溃，mp4是无法播放的。而flv就不同了，他是为流媒体而设计的，随时中断，崩溃，不会影响视频文件播放。

本文DEMO源码下载：

https://download.csdn.net/download/xjb2006/85109025

dxgi桌面屏幕录像（windows屏幕录像，硬件编码，声音捕获，音视频同步）

QQ35744025 萧萧工作室