How do I use the FFmpeg libraries to extract every nth frame from a video and save it as a small image file in C++?
After experimenting with the examples on the FFmpeg documentation, I was finally able to create a short program that extracts every nth frame from a video. However, the output files that it produces are huge at over 15mb for each image. How can I change this to produce lower quality images?
The result I am trying to get is done easily on the command line with:
ffmpeg -i [input video] -vf "select=not(mod(n\,10))" -fps_mode vfr img_%03d.jpg
For a video with about 500 frames, this creates 50 images that are only about 800kb each; how am would I be able to mimic this in my program?
My code consists of opening the input file, decoding the packets, then saving the frames:
#include <cstdio>
#include <cstdlib>
#include <iostream>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
}
static AVFormatContext *fmt_ctx;
static AVCodecContext *dec_ctx;
static int video_stream_index = -1;
// OPEN THE INPUT FILE
static int open_input_file(const char *filename) {
// INIT VARS AND FFMPEG OBJECTS
int ret;
const AVCodec *dec;
// OPEN INPUT FILE
if((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
printf("ERROR: failed to open input file\n");
return ret;
}
// FIND STREAM INFO BASED ON INPUT FILE
if((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
printf("ERROR: failed to find stream information\n");
return ret;
}
// FIND THE BEST VIDEO STREAM FOR THE INPUT FILE
ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &dec, 0);
if(ret < 0) {
printf("ERROR: failed to find a video stream in the input file\n");
return ret;
}
video_stream_index = ret;
// ALLOCATE THE DECODING CONTEXT FOR THE INPUT FILE
dec_ctx = avcodec_alloc_context3(dec);
if(!dec_ctx) {
printf("ERROR: failed to allocate decoding context\n");
// CAN NOT ALLOCATE MEMORY ERROR
return AVERROR(ENOMEM);
}
avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);
// INIT THE VIDEO DECODER
if((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
printf("ERROR: failed to open video decoder\n");
return ret;
}
return 0;
}
// SAVE THE FILE
static void save(unsigned char *buf, int wrap, int x_size, int y_size, char *file_name) {
// INIT THE EMPTY FILE
FILE *file;
// OPEN AND WRITE THE IMAGE FILE
file = fopen(file_name, "wb");
fprintf(file, "P6\n%d %d\n%d\n", x_size, y_size, 255);
for(int i = 0; i < y_size; i++) {
fwrite(buf + i * wrap, 1, x_size * 3, file);
}
fclose(file);
}
// DECODE FRAME AND CONVERT IT TO AN RGB IMAGE
static void decode(AVCodecContext *cxt, AVFrame *frame, AVPacket *pkt,
const char *out_file_name, const char *file_ext, int mod=1) {
// INIT A BLANK CHAR TO HOLD THE FILE NAME AND AN EMPTY INT TO HOLD FUNCTION RETURN VALUES
char buf[1024];
int ret;
// SEND PACKET TO DECODER
ret = avcodec_send_packet(cxt, pkt);
if(ret < 0) {
printf("ERROR: error sending packet for decoding\n");
exit(1);
}
// CREATE A SCALAR CONTEXT FOR CONVERSION
SwsContext *sws_ctx = sws_getContext(dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt, dec_ctx->width,
dec_ctx->height, AV_PIX_FMT_RGB24, SWS_BICUBIC, NULL, NULL, NULL);
// CREATE A NEW RGB FRAME FOR CONVERSION
AVFrame* rgb_frame = av_frame_alloc();
rgb_frame->format = AV_PIX_FMT_RGB24;
rgb_frame->width = dec_ctx->width;
rgb_frame->height = dec_ctx->height;
// ALLOCATE A NEW BUFFER FOR THE RGB CONVERSION FRAME
av_frame_get_buffer(rgb_frame, 0);
// WHILE RETURN COMES BACK OKAY (FUNCTION RETURNS >= 0)...
while(ret >= 0) {
// GET FRAME BACK FROM DECODER
ret = avcodec_receive_frame(cxt, frame);
// IF "RESOURCE TEMP NOT AVAILABLE" OR "END OF FILE" ERROR...
if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
return;
} else if(ret < 0) {
printf("ERROR: error during decoding\n");
exit(1);
}
// IF FRAME NUMBER IF THE (MOD)TH FRAME...
if(cxt->frame_number % mod == 0){
// OUTPUT WHICH FRAME IS BEING SAVED
printf("saving frame %03d\n", cxt->frame_number);
// REMOVES TEMPORARY BUFFERED DATA
fflush(stdout);
// SCALE (CONVERT) THE OLD FRAME TO THE NEW RGB FRAME
sws_scale(sws_ctx, frame->data, frame->linesize, 0, frame->height,
rgb_frame->data, rgb_frame->linesize);
// SET "BUF" TO THE OUTPUT FILE PATH (SAVES TO "out_file_name_###.file_ext")
snprintf(buf, sizeof(buf), "%s_%03d.%s", out_file_name, cxt->frame_number, file_ext);
// SAVE THE FRAME
save(rgb_frame->data[0], rgb_frame->linesize[0], rgb_frame->width, rgb_frame->height, buf);
}
}
}
int main() {
// SIMULATE COMMAND LINE ARGUMENTS
char argv0[] = "test";
char argv1[] = "/User/Desktop/frames/test_video.mov";
char *argv[] = {argv0, argv1, nullptr};
// INIT VARS AND FFMPEG OBJECTS
int ret;
AVPacket *packet;
AVFrame *frame;
// ALLOCATE FRAME AND PACKET
frame = av_frame_alloc();
packet = av_packet_alloc();
if (!frame || !packet) {
fprintf(stderr, "Could not allocate frame or packet\n");
exit(1);
}
// IF FILE DOESN'T OPEN, GO TO THE END
if((ret = open_input_file(argv[1])) < 0) {
goto end;
}
// READ ALL THE PACKETS - simple
while(av_read_frame(fmt_ctx, packet) >= 0) {
// IF PACKET INDEX MATCHES VIDEO INDEX...
if (packet->stream_index == video_stream_index) {
// SEND PACKET TO THE DECODER and SAVE
std::string name = "/User/Desktop/frames/img";
std::string ext = "bmp";
decode(dec_ctx, frame, packet, name.c_str(), ext.c_str(), 5);
}
// UNREFERENCE THE PACKET
av_packet_unref(packet);
}
// END MARKER
end:
avcodec_free_context(&dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
av_packet_free(&packet);
// FINAL ERROR CATCH
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
exit(1);
}
exit(0);
}
I am not sure how to go about producing images that are much smaller in size like the ones produced on the command line. I have a feeling that this is possible somehow during the conversion to RGB or the saving of the file but I can’t seem to figure out how.
Also, is there any way that I could go about this much more efficiently? On the command line, this finishes very quickly (no more than a second or two for a 9 sec. movie at ~60 fps).
Read more here: Source link