Audio driven image processing

Pixilang programming language
Post Reply
ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Audio driven image processing

Post by ainegil »

I started to experiment with audio driven image processing

here is an example, demo video will follow later

It loads audio and image
anayzes audio via fft
converts fft to ERB scale (5 bands here)

maps this on 255 values 0..1

these values are then used as lookup for brightness (greyscale) of pixels
in the image
here the image is converted to greyscale, and added on the color version
with "overlay" blending mode (or "hard light", "multiply", "screen" etc)

the brightness in the greyscale is used to determin the blend amount

does not run realtime

Video will follow, maybe someone has more ideas or improvements

Code: Select all

set_pixel_size( WINDOW_XSIZE / 480 )
resize( get_screen(), WINDOW_XSIZE, WINDOW_YSIZE )


//Input Image
img = load( "images/test_5b.jpg" )

//Input WAV file:
filename = "audio.wav"

//Output MJPEG AVI video file:
avi_filename = "test 2.avi"

//Graphics options:

showfps = 1;
xsize = 1024
ysize = 600
if avi_filename != 0
{
//xsize = 1280 
//ysize = 720 
}
fps = 25

//_---------------------------------


if xsize == 0 { ss = 600 } else { ss = xsize }
set_pixel_size( WINDOW_XSIZE / ss )
resize( get_screen(), WINDOW_XSIZE, WINDOW_YSIZE )

scr = get_screen()
if xsize != 0 && ysize != 0 { resize( scr, xsize, ysize ) }
xsize = get_xsize( scr )
ysize = get_ysize( scr )
hxsize = xsize / 2
hysize = ysize / 2

if fps == 0 { fps = 25 }


wav = load( filename )
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
samplerate = wav.sample_rate
sample_rate_scale = 1
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

include "../../../lib/mjpeg.pixi"

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}
//-------------------

gfx_init()

start_timer( 0 )


while( 1 )
{
    
    transp( 256 )
    clear();
 
    // analyse audio ######################################
    // FFT
	clean(ftbufi)
    	clean(ftbufr)
    $p = wav_ptr
    $t = $p / wav_size
    $i = 0 while $i < ftsize
    {
	$v = wav[ $p + $i * wav_channels ] / wav_amp_max	
	vals[ $i ] = $v 
	$winlen = 2* wav.sample_rate/fps 
	// half sine window
	//if $i <=  $winlen{$win = sin(M_PI*$i/$winlen)} else{$win =0 }
	//or cosine window
	if $i <=  $winlen{$win = cos(2*M_PI*$i/$winlen)*0.5 + 0.5} else{$win =0 }
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = $v*$win
	$i + 1
    }	
fft(1, ftbufi,ftbufr,ftsize)

	// get magnitude 
	$i = 0 while $i < hftsize
	{ 		
	ftmag[ $i ] = sqrt(ftbufr[$i]*ftbufr[$i]+ftbufi[$i]*ftbufi[$i])	
	if ftmag[ $i ] >= ftmags[ $i ] {
		ftmagsmoo[ $i ] = ftmag[$i]	
	}else{ ftmagsmoo[ $i ] = ftmagsmoo[ $i ] * 0.92 } 

	$i + 1
	}
	
	// map on ERB scale 
	$size = erbsize
	$ERBmin = 1
	$ERBrange = 38
	//clean(erbbuf)
	$i = 0 while $i < $size
	{
	//$warpf = i*wav.sample_rate/ftsize	
	//$warperb = 21.4 * log10(1 + 0.00437 * $warpf)
	//$warpi1 = ($warperb *xsize/60 +0.5)
	//map erb range-min, min erb:
	$erb = $ERBrange*$i/$size + $ERBmin
	$erbf = ($erb) * ( 1/21.4)
	$erbf = pow(10,$erbf)
	$erbf = $erbf - 1
	$erbf = $erbf * (1/ 0.00437) 
	$erbift1 = $erbf/(wav.sample_rate/ftsize)
	if $erbift1 < 0 {$erbift1 =0}
	$erbf = ($erb + $ERBrange/$size) * ( 1/21.4)
	$erbf = pow(10,$erbf)
	$erbf = $erbf - 1
	$erbf = $erbf * (1/ 0.00437)
	$erbift2 = $erbf/(wav.sample_rate/ftsize)
	if $erbift2 < 0 {$erbift2 = 0}
	erbbuf[$i]=0
	$j = 0 while $j < $erbift2 - $erbift1
		{
		erbbuf[$i] = erbbuf[$i] + ftmagsmoo[$erbift1 +$j]
		$j = $j +1
		}
	// convert to dB
	erbbuf[$i] = atodb(erbbuf[$i])

	//get average
	if $i == 0 { $dbav = erbbuf[$i] }else{ $dbav = $dbav + erbbuf[$i] }
	$i +1
	}
	$dbav = $dbav / $size
	//------------------------------------------
	
	// create light correction map
	$size = 256
	//$dbmult = 2
	//$dblift = 6
	$i = 0 while $i < $size
	{ 
		
	$erbi = ($i * erbsize / $size ) 
	$frac = mod($erbi,1) 
	$erbi = $erbi div 1
	$v = dbtoa(erbbuf[$erbi])  
	$v2 = dbtoa(erbbuf[$erbi +1])
	$v = $v + ($v2 - $v) * $frac
	//lightbuf[$i] = ($v- $dbav)/$dbmult 
	lightbuf[$i] = $v * 32//( $size / erbsize)
	
	if lightbuf[$i] > 1 { lightbuf[$i] = 1}
	if lightbuf[$i] < 0 { lightbuf[$i] = 0}
	
	if lightbuf[$i] > lightbufsmoo[$i] {
		lightbufsmoo[$i] = lightbuf[$i]
	}else{ lightbufsmoo[$i] = lightbufsmoo[$i]*0.92}
	

	$i + 1
	}
	
	
// GRAFIC


    
    //convert img2c to grey and relight
    $offsetmax = 0 // offset not used here
    $i = 0 while $i < get_size(img2b){
    // get greyscale
    	$gr = get_red(img2c[$i]) +  get_green(img2c[$i]) + get_blue(img2c[$i])
    	$gr = $gr div 3
    	$cgr = get_color( $gr,$gr,$gr)
    	
	$idxnew = ( $i + lightbufsmoo[$gr] * $offsetmax ) % get_size(img2a)
	
	// hard light with with greyscale:
	/*
    	if $gr < 127{
    	$gr_m = 2 * $gr
    	$pixelr = get_red(img2a[$idxnew])*$gr_m div 255
    	$pixelg = get_green(img2a[$idxnew])*$gr_m div 255
    	$pixelb = get_blue(img2a[$idxnew])*$gr_m div 255
    	} else {
    	$gr_m = 255 - $gr
    	$pixelr = 255 - 2*(255-get_red(img2a[$idxnew]))*$gr_m div 255
    	$pixelg = 255 - 2*(255-get_green(img2a[$idxnew]))*$gr_m div 255
    	$pixelb = 255 - 2*(255-get_blue(img2a[$idxnew]))*$gr_m div 255
    	}
    	$pixelrgb = get_color($pixelr,$pixelg,$pixelb)
    	*/
    	
    	// overlay light with with greyscale:
    	if $gr < 127{
    	$gr_m = $gr
    	$pixelr = 2*get_red(img2a[$idxnew])*$gr_m div 255
    	$pixelg = 2*get_green(img2a[$idxnew])*$gr_m div 255
    	$pixelb = 2*get_blue(img2a[$idxnew])*$gr_m div 255
    	} else {
    	$gr_m = 255 - $gr
    	$pixelr = 255 - 2*(255-get_red(img2a[$idxnew]))*$gr_m div 255
    	$pixelg = 255 - 2*(255-get_green(img2a[$idxnew]))*$gr_m div 255
    	$pixelb = 255 - 2*(255-get_blue(img2a[$idxnew]))*$gr_m div 255
    	}
    	$pixelrgb = get_color($pixelr,$pixelg,$pixelb)
   
    	
    	// screen light with greyscale
    	/*
    	$gr_m = 255 - $gr
    	$pixelr = 255 - (255-get_red(img2a[$idxnew]))*$gr_m div 255 % 255
    	$pixelg = 255 - (255-get_green(img2a[$idxnew]))*$gr_m div 255 % 255
    	$pixelb = 255 - (255-get_blue(img2a[$idxnew]))*$gr_m div 255 % 255
    	$pixelrgb = get_color($pixelr,$pixelg,$pixelb)
    	*/
    	// mix hard light according to ERB scale spectrum 
    	
    	img2b[$idxnew] = get_blend(img2a[$i],$pixelrgb,255*lightbufsmoo[$gr])
    	
    	$i +1
    }
    
    //pixi( img2b, 0, 0)
    pixi( img2b, 0, 0,WHITE,2,2 )
	

// FRAME########################################################

if showfps {
    ts = ""
    sprintf( ts, "FPS:%u", FPS )
    print( ts, -get_xsize( get_screen() ) / 2 + 16, -get_ysize( get_screen() ) / 2 + 16, WHITE, TOP | LEFT )
}
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if !sample_loop && wav_ptr >= wav_size { breakall }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { halt } }
    
    framecounter + 1;     
}

fn gfx_init()
{
    	vals = new( xsize, 1, FLOAT )
    	clean( vals )
	ftsize = 4096//2048
	hftsize = ftsize / 2
	ftbufr = new( ftsize, 1, FLOAT )
 	ftbufi = new( ftsize, 1, FLOAT )
 	ftmag = new( hftsize, 1, FLOAT )
	ftmagsmoo = new( hftsize, 1, FLOAT )
	
	// number of bands in ERB scale
	erbsize = 5
	erbbuf = new( erbsize, 1, FLOAT)
	//erbbufsmoo = new( erbsize, 1, FLOAT)
	
	lightbuf = new( 256, 1, FLOAT)
	lightbufsmoo = new( 256, 1, FLOAT)
	
img2a = clone( img )
img2b = clone( img )
img2c = clone( img )
	//copy( img2a, img )
    	//copy( img2b, img )
    	//copy( img2c, img )
//resize to blur 
//resize(img2c, get_xsize(img)/7,get_ysize(img)/7,-1, RESIZE_COLOR_INTERP2)
//resize(img2c, get_xsize(img),get_ysize(img),-1, RESIZE_COLOR_INTERP2)
set_flags( img2b, RESIZE_COLOR_INTERP2)
	
	framecounter = 0;
}

fn atodb($a){
	$db = 20*log10($a)
ret($db)
}
fn dbtoa($db){
	$a = pow(10,($db)*0.05)
ret($a)
}
fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size
    {
	if !sample_loop
	{
	    ret( 0 )
	}
    }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    if sample_loop
    {
	if wav_ptr >= wav_size
	{
	    $ff = ( wav_ptr - wav_size ) / wav_channels
	    $p = $frames - $ff
	    wav_ptr = 0
	    $c = 0 while( $c < wav_channels )
	    {
		copy( $channels[ $c ], wav, $p, wav_ptr + $c, $ff, 1, wav_channels )
    		$c + 1
	    }
	    wav_ptr + $ff * wav_channels
	}
    }
    ret( 1 )
}

ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

here is test



it's not spectacular and a similar effect could be achieved much simpler,
but this way it is more flexible.
It can look more interesting depending on the music, the image
and tweaking of paramters.

I think its useful.
renders only 4 fps on Raspi 400 though, with the image half size of what is shown.

The questions are,
how can we make it faster with the fast container operations.
what other interesting effects could be achieved in a similar way.
and how do you apply this to a stream of inages, without loading the complete animation.

I have to look into these things but maybe someone else has or questions.
ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

better and shorter code with lots of comments and options for experimentation

does overlay with greyscale and pixel offset from audio

its very slow though, but useful

Code: Select all

set_pixel_size( WINDOW_XSIZE / 480 )
resize( get_screen(), WINDOW_XSIZE, WINDOW_YSIZE )


//Input Image
img = load( "images/test_.jpg" )

//Input WAV file:
filename = "audio.wav"
//
preamp = 1 // amplify audio for analysis, default 1 (unchanged) 

//Output MJPEG AVI video file:
avi_filename = "test.avi"

//Graphics options:
showfps = 1;
xsize = 1024
ysize = 600
if avi_filename != 0
{
//xsize = 1280 
//ysize = 720 
}
fps = 20

//PREPARE---------------------------------


if xsize == 0 { ss = 600 } else { ss = xsize }
set_pixel_size( WINDOW_XSIZE / ss )
resize( get_screen(), WINDOW_XSIZE, WINDOW_YSIZE )

scr = get_screen()
if xsize != 0 && ysize != 0 { resize( scr, xsize, ysize ) }
xsize = get_xsize( scr )
ysize = get_ysize( scr )
hxsize = xsize / 2
hysize = ysize / 2

if fps == 0 { fps = 25 }


wav = load( filename )
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
samplerate = wav.sample_rate
sample_rate_scale = 1
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

include "../../../lib/mjpeg.pixi"

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}
//-------------------

gfx_init()

start_timer( 0 )

// MAIN LOOP ---------------------------------------------------
while( 1 )
{
    
// analyse audio ######################################
    // FFT
	clean(ftbufi)
    	clean(ftbufr)

    $p = wav_ptr
    $t = $p / wav_size
    $i = 0 while $i < ftsize
    {
	$v = wav[ $p + $i * wav_channels ] / wav_amp_max	
	vals[ $i ] = $v * preamp
	$winlen = 2* wav.sample_rate/fps 
	// half sine window
	//if $i <=  $winlen{$win = sin(M_PI*$i/$winlen)} else{$win =0 }
	//or cosine window
	if $i <=  $winlen{$win = cos(2*M_PI*$i/$winlen)*0.5 + 0.5} else{$win =0 }
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = $v*$win
	$i + 1
    }	
fft(1, ftbufi,ftbufr,ftsize)
	
	// map on ERB scale 
	$size = erbsize
	$ERBmin = 2
	$ERBrange = 38
	//clean(erbbuf)
	$i = 0 while $i < $size
	{
	$erb = $ERBrange*$i/($size) + $ERBmin
	$erbf = ($erb) * 0.046729
	$erbf = pow(10,$erbf) -1
	$erbf = $erbf * 228.833 
	$erbift1 = $erbf * ftbinfrez
	$erbf = ($erb + $ERBrange/($size)) * 0.046729
	$erbf = pow(10,$erbf) -1
	$erbf = $erbf * 228.833 
	$erbift2 = $erbf * ftbinfrez
	erbbuf[$i]=0
	$j = 0 while $j < $erbift2 - $erbift1
		{
		$ii = $erbift1 +$j
			
		erbbuf[$i] = erbbuf[$i] + sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
		$j = $j +1
		}

	$i +1
	}
	//------------------------------------------
	
	// create light correction lookup map
	$size = 256

	$i = 0 while $i < $size 
	{ 	
	$erbi = ($i * erbsize / $size ) 
	$frac = mod($erbi,1) 
	
	$erbi = $erbi div 1
	$v = erbbuf[$erbi] 
	if $erbi < $size -1 {
	$v2 = erbbuf[$erbi +1] 	
	$v = $v + ($v2 - $v) * $frac}

	lmap8[$i] = $v * erbsize //32
	
	if lmap8[$i] > 1 { lmap8[$i] = 1}
	if lmap8[$i] < 0 { lmap8[$i] = 0}
	
	if lmap8[$i] > lmap8smoo[$i] {
		lmap8smoo[$i] = lmap8[$i]
	}else{ lmap8smoo[$i] = lmap8smoo[$i]*0.84}
	
	if lmap8smoo[$i] > 1 { lmap8smoo[$i] = 1}
	if lmap8smoo[$i] < 0 { lmap8smoo[$i] = 0}
	
	$i + 1
	}
	
	
// GRAPHIC###########################################################

    $i = 0 while $i < get_size(img2a){
    	
    	$i2 = $i + get_xsize(img2a)
    	$i2b = $i2 + get_xsize(img2a)
	if $i2 > get_size(img2a)  { $i2 = get_size(img2a) } 
    	
    	//modulator greyscale from original
    	$gr = (get_red(img2a[$i]) +  get_green(img2a[$i]) + get_blue(img2a[$i]))  div 3
    	
    	//target greyscale from canvas
    	//normal grey
    	$gr2 = (get_red(img2b[$i2]) +  get_green(img2b[$i2]) + get_blue(img2b[$i2])) 	
	//more red
	//$gr2 = ((get_red(img2b[$i2])<<1) +  ((get_green(img2b[$i2]) + get_blue(img2b[$i2]))>>1)) 
	//more green
	//$gr2 = ((get_green(img2b[$i2])<<1) +  ((get_blue(img2b[$i2]) + get_red(img2b[$i2]))>>1)) 
	//more blue
	//$gr2 = ((get_blue(img2b[$i2])<<1) +  ((get_green(img2b[$i2]) + get_red(img2b[$i2]))>>1)) 
 		
	$gr2 = $gr2 div 3
	
	//modulator greyscale preprocess
	//solarize
	//$gr = abs(($gr << 1) - 255) $gr = 255 - $gr
	//
	// multiply
	$gr = ($gr*$gr) >> 8 
	//
	//screen
	//$gr = 255 - $gr $gr = 255 -( $gr*$gr >> 8)
	//
	//soft screen 
	//$temp = $gr2
	//$gr = 255 - $gr $gr = 255 -( $gr*$gr >> 8)
	//$gr = ($temp >> 1) + ($gr >> 1)
	//
	
	
	//target greyscale preprocess
	//pre multiply
	//$gr2 = ($gr2*$gr2) >> 8 	
	//
	//pre solarize
	//$gr2 = abs(($gr2 << 1) - 255) $gr2 = 255 - $gr2
	//
	//pre screen
	$gr2 = 255 - $gr2 $gr2 = 255 -( $gr2*$gr2 >> 8)
	//
	//pre soft screen
	//$temp = $gr2
	//$gr2 = 255 - $gr2 $gr2 = 255 -( $gr2*$gr2 >> 8)
	//$gr2 = ($temp >> 1) + ($gr2 >> 1)
	//
	
	
	// screen mode overlay greyscale:
	//==============================
	
	//pre modulate greyscale, symmetric
	//$gr2 = (($gr2 - 127) * lmap8smoo[$gr2] + 127) & 255
	//
	//pre modulate greyscale, asymmetric
	//$gr2 = ( $gr2  * lmap8smoo[$gr2] ) & 255
	//
	
	// modulator greyout levels 
	// none
	//$gr_m =  $gr2 
	//normal
	$gr_m = ( $gr2 >> 1 )+ 64
	//soft
	//$gr_m = ( $gr2 >> 2 )+ 96
	//softer
	//$gr_m = ( $gr2 >> 3 )+ 112
	
	// overlay with shifted greyscale 
	//		
	pxl32[0] = get_red(img2b[$i] )
	pxl32[1] = get_green(img2b[$i] )
	pxl32[2] = get_blue(img2b[$i] )
	if $gr_m < 127 {
    	op_cn(OP_MUL,pxl32,$gr_m)
	op_cn(OP_RSHIFT,pxl32,7)
    	}else{
    	$gr_m = 255 - $gr_m 
    	op_cn(OP_SUB2 ,pxl32,255)
    	op_cn(OP_MUL,pxl32,$gr_m)
    	op_cn(OP_RSHIFT,pxl32,7)
    	op_cn(OP_SUB2 ,pxl32,255)
    	}
	img2b[$i] = get_color(pxl32[0],pxl32[1],pxl32[2])
			
	// blend in original according to lightmap	 
    	img2b[$i] = get_blend(img2a[$i],img2b[$i],254*lmap8smoo[$gr])
    
    	$i +1
    }
    
    // display result
    transp(255)
    pixi( img2b, 0, 0,WHITE,2,2 )

	

// DRAW FRAME########################################################

if showfps {
    ts = ""
    sprintf( ts, "FPS:%u", FPS )
    print( ts, -get_xsize( get_screen() ) / 2 + 16, -get_ysize( get_screen() ) / 2 + 16, WHITE, TOP | LEFT )
}
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if !sample_loop && wav_ptr >= wav_size { breakall }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { halt } }
    
    framecounter + 1;     
}
//END MAIN LOOP



// FUNCTIONS AND STUFF-------------------------------------------------

fn gfx_init()
{
// initiala nd global  values
    	vals = new( xsize, 1, FLOAT )
    	clean( vals )
	ftsize = 4096
	ftbinf = wav.sample_rate/ftsize
	ftbinfrez = 1 / ftbinf
	hftsize = ftsize / 2
	ftbufr = new( ftsize, 1, FLOAT )
 	ftbufi = new( ftsize, 1, FLOAT )
 	ftmag = new( hftsize, 1, FLOAT )
	ftmagsmoo = new( hftsize, 1, FLOAT )
	
	// number of bands in ERB scale
	erbsize = 7 // up to 256
	erbbuf = new( erbsize, 1, FLOAT)
	
	// 256 lookup from spectrum
	lmap8 = new( 256, 1, FLOAT) 
	lmap8smoo = new( 256, 1, FLOAT) // smoothed version
	
	C_GREY = get_color(127,127,127) // neutral grey
	
	pxl32 = new(3, 1, INT32)// working pixel 32 bit
	
img2a = clone( img ) // original image
img2b = clone( img ) // working image canvas

set_flags( img2b, RESIZE_COLOR_INTERP2)
	
	framecounter = 0;
}

fn atodb($a){
// converts amplitude to dB
	$db = 20*log10($a)
ret($db)
}
fn dbtoa($db){
// converts db to linear amplitude
	$a = pow(10,($db)*0.05)
ret($a)
}
fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size
    {
	if !sample_loop
	{
	    ret( 0 )
	}
    }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    if sample_loop
    {
	if wav_ptr >= wav_size
	{
	    $ff = ( wav_ptr - wav_size ) / wav_channels
	    $p = $frames - $ff
	    wav_ptr = 0
	    $c = 0 while( $c < wav_channels )
	    {
		copy( $channels[ $c ], wav, $p, wav_ptr + $c, $ff, 1, wav_channels )
    		$c + 1
	    }
	    wav_ptr + $ff * wav_channels
	}
    }
    ret( 1 )
}


ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

sorry, I just realized I messed up parts of the file writing code which I forgot to copy when I rearranged the script.

I will post a better version soon.
ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »



slight postprocessing was added (blurred copy and gradient overlay)

Render time is about 2 h on Raspberry for 640×360.

I could not get it to work with the op_c functions on the whole container
so I am using the op_cn functions per pixel which is maybe a bad idea and does not increase speed, I dont know.

Have to look into these functions again and how they can be used.
I am not sure if so, cause only a few work on color, but possibly I just tried the wrong
way. See code above how I used it.
ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

Slightly faster more beautiful and more concise cleaned up version.

All options are in the header.

I'll leave it at that though, from the demo views it seems no one is interested anyways.

Code: Select all



// File Options:
//Input Image
img = load( "image.jpg" )

//Input WAV file:
filename = "audio.wav"
//
preamp = 2// amplify audio for analysis 

//Output MJPEG AVI video file:
//avi_filename = "test.avi"

//Graphics options:
// Effect:
bands = 9 // number of bands
offsetmax = 3 // dynamic offset in pixels, fractional values possible

// Screen:
showfps = 1 //show fps
showprogress = 1 // show progress bar
scale = 1/2 // scaling for preview/ faster render

xsize = 640 // realtime resolution
ysize = 360
if avi_filename != 0
{
xsize = 1280 /2 // video resolution
ysize = 720 /2
}
fps = 20

//Sound options:
sample_rate_scale = 1

//###########################################################

//PREPARE---------------------------------


if xsize == 0 { ss = 600 } else { ss = xsize }
set_pixel_size( WINDOW_XSIZE / ss )
resize( get_screen(), WINDOW_XSIZE, WINDOW_YSIZE )

scr = get_screen()
if xsize != 0 && ysize != 0 { resize( scr, xsize, ysize ) }
xsize = get_xsize( scr )
ysize = get_ysize( scr )
hxsize = xsize / 2
hysize = ysize / 2

if fps == 0 { fps = 25 }


wav = load( filename )
/*logf( "WAV INFO:\n" )
logf( "  Sample Rate: %d\n", wav.sample_rate )
logf( "  Channels: %d\n", wav.channels )
logf( "  Loop Start (sample number): %d\n", wav.loop_start )
logf( "  Loop Length (number of samples): %d\n", wav.loop_len )
logf( "  Loop Type (0-none; 1-normal; 2-bidirectional): %d\n", wav.loop_type )*/
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
samplerate = wav.sample_rate
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

include "../../../lib/mjpeg.pixi"

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}
//-------------------

gfx_init()

start_timer( 0 )

// MAIN LOOP ---------------------------------------------------
while( 1 )
{
    
// analyse audio ######################################
    // FFT
	clean(ftbufi)
    	clean(ftbufr)

    $p = wav_ptr
    $t = $p / wav_size
    $i = 0 while $i < ftsize
    {
	if $i <=  winlen{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = wav[ $p + $i * wav_channels ] * preampdivwavmax * wintable[$i]
	}else{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = 0
	}
	$i + 1
    }	
fft(1, ftbufi,ftbufr,ftsize)
	
	// map on ERB scale 
	// for color
	$erbgrav = 0
	$erbvmax = 0
	$erbsum = 0
	$i = 0 while $i < erbsize
	{
	erbbuf[$i]=0
	// pass 1 up
	$fromidx = erbiftlookup[$i*2 ]
	$toidx = erbiftlookup[$i*2 + 1 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * ftweight[$ii]		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	// pass 2 down
	$fromidx = erbiftlookup[$i*2 + 1 ]
	$toidx = erbiftlookup[$i*2 + 2 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * (1-ftweight[$ii])		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	$erbsum = $erbsum + erbuf[$i]
	$erbgrav = $erbgrav + erbbuf[$i] * ($i -erbsize/2)
	$i + 2
	}
	
	if $erbvmax > 1 { $erbvmax = 1 } 
	//$cerbold = C_erb
	C_erb = get_blend(C_erb, hsvrgb( 2880 - $erbgrav * (360/8), $erbsum,0.85 + 0.5 *$erbsum),12)
	//------------------------------------------
	
	// create light lookup map
	$size = 256
	$i = 0 while $i < $size 
	{ 	
	$erbi = ($i * erbsize / $size ) 
	$frac = mod($erbi,1) 
	
	$erbi = $erbi div 1
	$v = erbbuf[$erbi] 
	if $erbi < $size -1 {
	$v2 = erbbuf[$erbi +1] 	
	$v = $v + ($v2 - $v) * $frac}
		
	lmap[$i] = (atodb($v) +54 )/54 
	
	if lmap[$i] > lmapsmoo[$i] {
		lmapsmoo[$i] = lmap[$i]
	}else{ lmapsmoo[$i] = lmapsmoo[$i]*0.96}//84}
	
	if lmapsmoo[$i] > 1 { lmapsmoo[$i] = 1}
	if lmapsmoo[$i] < 0 { lmapsmoo[$i] = 0}
	
	$i + 1
	}
	
	
// GRAPHIC###########################################################


    $i = 0 while $i < get_size(img2a){
    	
    	$i2 = $i + (get_xsize(img2a))

	if $i2 > get_size(img2a)  { $i2 = get_size(img2a) } 
    
    	//modulator greyscale from original
    	$grorig = (get_red(img2a[$i]) +  get_green(img2a[$i]) + get_blue(img2a[$i]))  div 3
    	
    	// get offset and fraction 
    	$offset = (lmapsmoo[$grorig]* offsetmax )
    	$idx = $offset div 1
    	$frac = $offset - $idx
    	$i2 = $i + (get_xsize(img2a)) * $idx
    	$i2plus1 = $i2 + (get_xsize(img2a)) 
    	if $i2 > get_size(img2a)  { $i2 = get_size(img2a) } 
    	if $i2plus1 > get_size(img2a)  { $i2plus1 = get_size(img2a) } 
    	//
    	// get colors from offset 
    	$c2 = get_blend(img2b[$i2],img2b[$i2plus1],255*$frac)
    	// get greys from offset
        $gr2 = (get_red($c2) +  get_green($c2) + get_blue($c2) ) div 3
	
	
	//modulator greyscale preprocess
	//solarize
	//$grorig = abs(($grorig << 1) - 255) $grorig = 255 - $grorig
	//
	// multiply
	//$grorig = ($grorig*$grorig) >> 8 
	//
	//screen
	//$grorig = 255 - $grorig $grorig = 255 -( $grorig*$grorig >> 8)
	//
	//soft screen 
	//$temp = $gr2
	//$grorig = 255 - $grorig $grorig = 255 -( $grorig*$grorig >> 8)
	//$grorig = ($temp >> 1) + ($grorig >> 1)
	//	
	
	//target greyscale preprocess
	//pre multiply
	//$gr2 = ($gr2*$gr2) >> 8 	
	//
	//pre solarize
	//$gr2 = abs(($gr2 << 1) - 255) $gr2 = 255 - $gr2
	//
	//pre screen
	$gr2 = 255 - $gr2 $gr2 = 255 -( $gr2*$gr2 >> 8)
	//
	//pre soft screen
	//$temp = $gr2
	//$gr2 = 255 - $gr2 $gr2 = 255 -( $gr2*$gr2 >> 8)
	//$gr2 = ($temp >> 1) + ($gr2 >> 1)
	//
	
	
	// screen mode overlay greyscale:
	//==============================
	
	//pre modulate greyscale, symmetric
	//$gr2 = (($gr2 - 127) * lmapsmoo[$gr2] + 127) & 255
	//
	//pre modulate greyscale, asymmetric
	//$gr2 = ( $gr2  * lmapsmoo[$gr2] ) & 255
	//
	
	// modulator greyout levels 
	// none
	//$grorig_m =  $gr2 
	//normal
	//$grorig_m = ( $gr2 >> 1 )+ 64
	//soft
	$grorig_m = ( $gr2 >> 2 )+ 96
	//softer
	//$grorig_m = ( $gr2 >> 3 )+ 112
	
	// overlay with shifted greyscale 
	/*	
	pxl32[0] = get_red(img2b[$i] )
	pxl32[1] = get_green(img2b[$i] )
	pxl32[2] = get_blue(img2b[$i] )
	if $grorig_m < 127 {
    	op_cn(OP_MUL,pxl32,$grorig_m)
	op_cn(OP_RSHIFT,pxl32,7)
    	}else{
    	$grorig_m = 255 - $grorig_m 
    	op_cn(OP_SUB2 ,pxl32,255)
    	op_cn(OP_MUL,pxl32,$grorig_m)
    	op_cn(OP_RSHIFT,pxl32,7)
    	op_cn(OP_SUB2 ,pxl32,255)
    	}
    	img2b[$i] = get_color(pxl32[0],pxl32[1],pxl32[2])
	*/
	if $grorig_m < 127 {
	$r = (get_red(img2b[$i])*$grorig_m)>>7
    	$g = (get_green(img2b[$i])*$grorig_m)>>7
	$b = (get_blue(img2b[$i])*$grorig_m)>>7
    	}else{
    	$grorig_m = 255 - $grorig_m 
    	$r = 255 - (((255-get_red(img2b[$i]))*$grorig_m)>>7)
    	$g = 255 - (((255-get_green(img2b[$i]))*$grorig_m)>>7)
    	$b = 255 - (((255-get_blue(img2b[$i]))*$grorig_m)>>7)
    	}  	
	img2b[$i] = get_color($r,$g,$b)
			
	// blend in original according to lightmap	 
    	img2b[$i] = get_blend(img2a[$i],img2b[$i],254*lmapsmoo[$grorig])
    
    	$i +1
    }
    
    
    // display result
    transp(255)
    pixi( img2b, 0, 0,WHITE,xsize/get_xsize(img2b),ysize/get_ysize(img2b) )
    
    //pixi( img2b, 0, 0,C_erb,xsize/get_xsize(img2b),ysize/get_ysize(img2b) )

  

// DRAW FRAME########################################################

if showfps {
    ts = ""
    sprintf( ts, "FPS:%u", FPS )
    print( ts, -get_xsize( get_screen() ) / 2 + 16, -get_ysize( get_screen() ) / 2 + 16, WHITE, TOP | LEFT )
}
if showprogress {
	line(-xscale/2, yscale/2 -16, -xscale/2+ xscale * wav_ptr/wav_size,yscale/2-16,WHITE)
}
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if !sample_loop && wav_ptr >= wav_size { breakall }
    //while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { halt } }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { breakall } }
    framecounter + 1;     
}

gfx_deinit()
if vo
{
    //Close Video Export:
    mjpeg_encoder_close( vo_encoder )
    fclose( vo_f )
    i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }
    remove( vo_audio_ch_bufs )
    remove( vo_audio_buf )
}

//END MAIN LOOP



// FUNCTIONS AND STUFF-------------------------------------------------

fn gfx_init()
{
// initiala nd global  values
	preampdivwavmax = preamp / wav_amp_max  	
	ftsize = 4096
	ftbinf = wav.sample_rate/ftsize
	ftbinfrez = 1 / ftbinf
	hftsize = ftsize / 2
	ftbufr = new( ftsize, 1, FLOAT )
 	ftbufi = new( ftsize, 1, FLOAT )
 	ftmag = new( hftsize, 1, FLOAT )
 	ftmagsmoo = new( hftsize, 1, FLOAT )

	winlen = 2* wav.sample_rate/fps 
	wintable = new( winlen, 1, FLOAT )
	$i = 0 while $i <= winlen{
	wintable[$i] = cos(2*M_PI*$i/winlen)*0.5 + 0.5
	$i + 1
	}	
	
	// ERB scale
	erbsize = bands * 2
	erbbuf = new( erbsize, 1, FLOAT)
	erbiftlookup = new( erbsize, 1, INT32)
	
	// ERB filter lookup
	//
	$size = erbsize
	$ERBmin = 1
	$ERBrange = 40 -$ERBmin

	$i = 0 while $i < erbsize
	{
	$erb = $ERBrange*$i/(erbsize) + $ERBmin
	$erbf = ($erb) * 0.046729
	$erbf = pow(10,$erbf) -1
	$erbf = $erbf * 228.833 
	erbiftlookup[$i] = ($erbf * ftbinfrez) div 1
	$i +1
	}
	
	// create weights
	ftweight = new( hftsize, 1, FLOAT )
	$i = 0 while $i < erbsize{
		$fromidx = erbiftlookup[$i ]
		$toidx = erbiftlookup[$i + 1 ] -1	
		$d = $toidx - $fromidx 
		$j = 0 while $j < $d{
			$ii = $fromidx +$j
			$w = $j / ($d -1)	
		ftweight[$ii] = $w
			$j = $j +1
		}
 	$i +1
 	}
	
	//----------
	
	// 256 lookup from spectrum
	lmap = new( 256, 1, FLOAT) 
	lmapsmoo = new( 256, 1, FLOAT) // smoothed version
	
	C_GREY = get_color(127,127,127) // neutral grey
	C_erb = WHITE
	
	pxl32 = new(3, 1, INT32)// working pixel 32 bit
		
img2a = clone( img ) // original image
img2b = clone( img ) // working image canvas

if scale !=1 {
resize(img2a, xsize*scale,ysize*scale,-1,RESIZE_COLOR_INTERP2)
resize(img2b, xsize*scale,ysize*scale,-1,RESIZE_COLOR_INTERP2)
}
set_flags( img2a, RESIZE_COLOR_INTERP2)
set_flags( img2b, RESIZE_COLOR_INTERP2)


	
	framecounter = 0;
}


fn atodb($a){
// converts amplitude to dB
	$db = 20*log10($a)
ret($db)
}
fn dbtoa($db){
// converts db to linear amplitude
	$a = pow(10,($db)*0.05)
ret($a)
}

fn hsvrgb($h,$s,$v){
// HSV to RGB
$h = $h % 360
$M = 255*$v
$m = $M*(1- $s)
$z = 255*($v*$s)*(1-abs(mod(($h/60.0),2)-1))
if $h < 60 {
$r = $M
$g = $z + $m
$b = $m
}else{
if $h < 120 {
$r = $z + $m
$g = $M 
$b = $m
}else{
if $h < 180 {
$r = $m 
$g = $M 
$b = $z + $m
}else{
if $h < 240 {
$r = $m 
$g = $z + $m
$b = $M 
}else{
if $h < 300 {
$r = $z + $m
$g = $m 
$b = $M 
}else{
$r = $M 
$g = $m 
$b = $z + $m
}}}}
}
ret(get_color($r ,$g ,$b ))
}
fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size
    {
	if !sample_loop
	{
	    ret( 0 )
	}
    }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    if sample_loop
    {
	if wav_ptr >= wav_size
	{
	    $ff = ( wav_ptr - wav_size ) / wav_channels
	    $p = $frames - $ff
	    wav_ptr = 0
	    $c = 0 while( $c < wav_channels )
	    {
		copy( $channels[ $c ], wav, $p, wav_ptr + $c, $ff, 1, wav_channels )
    		$c + 1
	    }
	    wav_ptr + $ff * wav_channels
	}
    }
    ret( 1 )
}


ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

A similar approach but much faster, with video input, and some bug fixes too.

Uses split_ycbcr on the frame, then scales the Y component half size,
loops through the pixels and overlays the values (or screen, or multiply) with them selves according to the erb spectrum.

scales the Y up again and uses split to get the image back.

Mixes the effect with a trail effect:
screen is captured, scaled a bit, and back mixed in.

Resolution 640x360 is fast enough to be useful, although Y is reduced in resolution.

Needs some tweaking but the script is a good starting point for further effects now.

for reference:

Code: Select all

set_pixel_size( WINDOW_XSIZE / 640 )
resize( get_screen(), 640, 360 )

scr = get_screen()
xsize = 640//get_xsize( scr )
ysize = 360//get_ysize( scr )
fps = 25

// INPUT VIDEO
mpath = "/home/nancy/Documents/pixilang/pixilang3/examples/myscripts/media/astron_360.mp4"

//Input WAV file:
filename = "/home/nancy/Documents/pixilang/pixilang3/examples/myscripts/media/drums.wav"
preamp = 0.3// amplify audio for analysis , effect amount
//

//Output MJPEG AVI video file:
avi_filename = "test_out.avi"

// Effect:
bands = 9 // number of bands
offsetmax = 3 // dynamic offset in pixels


include "../../lib/ffmpeg_video_export.pixi"
include "../../lib/ffmpeg_video_import.pixi"
include "../../lib/mjpeg.pixi"



framecount = 0
startframe = 1
maxframe = 7*25

vid_import = ffmpeg_video_import_open( 
mpath, 
xsize, ysize, 
startframe, maxframe )



wav = load( filename )
//Sound options:
sample_rate_scale = 1
logf( "WAV INFO:\n" )
logf( "  Sample Rate: %d\n", wav.sample_rate )
logf( "  Channels: %d\n", wav.channels )
logf( "  Loop Start (sample number): %d\n", wav.loop_start )
logf( "  Loop Length (number of samples): %d\n", wav.loop_len )
logf( "  Loop Type (0-none; 1-normal; 2-bidirectional): %d\n", wav.loop_type )
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
samplerate = wav.sample_rate
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}
//-------------------


gfx_init()
start_timer(0)

// MAIN LOOP ####################################################
while( framecount < maxframe  )
{

anaudio(wav_ptr)
modulator = sin( 2*M_PI*framecount/25)

if framecount % 4 {// alternatively use every Nth frame
//ffmpeg_video_import_read(vid_import,imgbuf)
}
ffmpeg_video_import_read(vid_import,imgbuf)



copy(screenclone, scr)
resize(screenclone,xsize+3,ysize+2,-1,RESIZE_COLOR_INTERP2)

split_ycbcr( 0, imgbuf, imgY, imgCb, imgCr )
//split_rgb( 1, imgY, imgY, imgY, imgY )


copy(imgW_2, imgY)
resize(imgW_2, xsize/2,ysize/2,-1, RESIZE_INTERP2)

// OVERLAY MODE

for( $i = 0; $i < get_size(imgW_2);$i+1){
$m1f = 1 - lmapsmoo[imgW_2[$i]]
if imgW_2[$i] < 127 {
imgW_2[$i] = ( imgW_2[$i]*$m1f + (  (imgW_2[$i] * imgW_2[$i]) >> 7 )* lmapsmoo[imgW_2[$i] ])& 255
}else{
$temp = 255 - imgW_2[$i]
imgW_2[$i] = ( imgW_2[$i]*$m1f + (255 -   ($temp * $temp) >> 7 )* lmapsmoo[imgW_2[$i]] )& 255
}
}


// SCREEN MODE, not used :
/*
for( $i = 0; $i < get_size(imgW_2);$i+1){
$m1f = 1 - lmapsmoo[imgW_2[$i]]
$temp = 255 - imgW_2[$i]
imgW_2[$i] = ( imgW_2[$i]*$m1f + (255 -   ($temp * $temp) >> 8 )* lmapsmoo[imgW_2[$i]] )& 255
}
*/

// MULTIPLY  MODE, not used :
/*
for( $i = 0; $i < get_size(imgW_2);$i+1){
$m1f = 1 - lmapsmoo[imgW_2[$i]]
imgW_2[$i] = (( imgW_2[$i]*$m1f +  (imgW_2[$i] * imgW_2[$i]) >> 8 )* lmapsmoo[imgW_2[$i]] )& 255
}
*/

resize(imgW_2, xsize,ysize,-1, RESIZE_INTERP2)
split_ycbcr( 1, imgW, imgW_2, imgCb, imgCr )
//split_rgb( 1, imgW, imgW_2, imgW_2, imgW_2 )

//t_reset()
//t_rotate(lmapsmoo[1],0,0,1)
transp(255)
pixi(screenclone)
//t_reset()
transp(31)
//pixi(imgbuf)
pixi(imgW)

resize(screenclone,xsize,ysize,-1,RESIZE_COLOR_INTERP2)

// ERB Spectrum for testing
/*
for ($i = 0; $i <256 ; $i+1){
line(-127 + $i, 160 -lmapsmoo[$i]*32,-127 + $i, 160, get_color($i,$i,$i))
}
*/

//
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if !sample_loop && wav_ptr >= wav_size { breakall }
    //while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { halt } }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { breakall } 
    }
    framecount + 1;     
}




//#######################################################################


ffmpeg_video_import_close(vid_import)
if vo
{
    //Close Video Export:
    mjpeg_encoder_close( vo_encoder )
    fclose( vo_f )
    i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }
    remove( vo_audio_ch_bufs )
    remove( vo_audio_buf )
}
i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }

// ####################################################################
// FUNCTIONS ####################################################
fn gfx_init()
{
// initiala nd global  values
	preampdivwavmax = preamp / wav_amp_max  	
	ftsize = 4096
	ftbinf = wav.sample_rate/ftsize
	ftbinfrez = 1 / ftbinf
	hftsize = ftsize / 2
	ftbufr = new( ftsize, 1, FLOAT )
 	ftbufi = new( ftsize, 1, FLOAT )
 	ftmag = new( hftsize, 1, FLOAT )
 	ftmagsmoo = new( hftsize, 1, FLOAT )

	winlen = 2* wav.sample_rate/fps 
	wintable = new( winlen, 1, FLOAT )
	$i = 0 while $i <= winlen{
	wintable[$i] = cos(2*M_PI*$i/winlen)*0.5 + 0.5
	$i + 1
	}	
	
	// ERB scale
	erbsize = bands * 2
	erbbuf = new( erbsize, 1, FLOAT)
	erbiftlookup = new( erbsize, 1, INT32)
	
	// ERB filter lookup
	//
	$size = erbsize
	$ERBmin = 1
	$ERBrange = 40 -$ERBmin

	$i = 0 while $i < erbsize
	{
	$erb = $ERBrange*$i/(erbsize) + $ERBmin
	$erbf = ($erb) * 0.046729
	$erbf = pow(10,$erbf) -1
	$erbf = $erbf * 228.833 
	erbiftlookup[$i] = ($erbf * ftbinfrez) div 1
	$i +1
	}
	
	// create weights
	ftweight = new( hftsize, 1, FLOAT )
	$i = 0 while $i < erbsize{
		$fromidx = erbiftlookup[$i ]
		$toidx = erbiftlookup[$i + 1 ] -1	
		$d = $toidx - $fromidx 
		$j = 0 while $j < $d{
			$ii = $fromidx +$j
			$w = $j / ($d -1)	
		ftweight[$ii] = $w
			$j = $j +1
		}
 	$i +1
 	}
	
	//----------
	
	// 256 lookup from spectrum
	lmap = new( 256, 1, FLOAT) 
	lmapsmoo = new( 256, 1, FLOAT) // smoothed version
	
	C_GREY = get_color(127,127,127) // neutral grey
	C_erb = WHITE
	
	pxl32 = new(3, 1, INT32)// working pixel 32 bit
	

imgbuf = new(xsize,ysize)
screenclone = new(xsize,ysize)
imgY = clone(imgbuf)
imgCb = clone(imgbuf)	
imgCr = clone(imgbuf)
imgW = clone(imgbuf)
imgW_2 = clone(imgbuf)

}

fn anaudio(){

// FFT
	clean(ftbufi)
    	clean(ftbufr)

    $p = wav_ptr
    $t = $p / wav_size
    $i = 0 while $i < ftsize
    {
	if $i <=  winlen{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = wav[ $p + $i * wav_channels ] * preampdivwavmax * wintable[$i]
	}else{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = 0
	}
	$i + 1
    }	
fft(1, ftbufi,ftbufr,ftsize)
	
	// map on ERB scale 
	// for color
	$erbgrav = 0
	$erbvmax = 0
	$erbsum = 0
	$i = 0 while $i < erbsize
	{
	erbbuf[$i]=0
	// pass 1 up
	$fromidx = erbiftlookup[$i ]
	$toidx = erbiftlookup[$i + 1 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * ftweight[$ii]		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	// pass 2 down
	$fromidx = erbiftlookup[$i + 1 ]
	$toidx = erbiftlookup[$i +2 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * (1-ftweight[$ii])		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	$erbsum = $erbsum + erbuf[$i]
	$erbgrav = $erbgrav + erbbuf[$i] * ($i -erbsize/2)
	$i + 1
	}
	
	if $erbvmax > 1 { $erbvmax = 1 } 
	//$cerbold = C_erb
	C_erb = get_blend(C_erb, hsvrgb( 2880 - $erbgrav * (360/8), $erbsum,0.85 + 0.5 *$erbsum),12)
	//------------------------------------------
	
	// create light lookup map
	$size = 256
	$i = 0 while $i < $size 
	{ 	
	$erbi = ($i * erbsize / $size ) 
	$frac = mod($erbi,1) 
	
	$erbi = $erbi div 1
	$v = erbbuf[$erbi] 
	if $erbi < $size -1 {
	$v2 = erbbuf[$erbi +1] 	
	$v = $v + ($v2 - $v) * $frac}
		
	lmap[$i] = (atodb($v) +54 )/54 
	

	if lmap[$i] > lmapsmoo[$i] {
		lmapsmoo[$i] = lmap[$i]
	}else{ lmapsmoo[$i] = lmapsmoo[$i]*0.96}//84}
	
	if lmapsmoo[$i] > 1 { lmapsmoo[$i] = 1}
	if lmapsmoo[$i] < 0 { lmapsmoo[$i] = 0}
	lmapsq[$i] = lmapsmoo[$i]
	$i + 1
	}	
ret(1)
}


fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size
    {
	if !sample_loop
	{
	    ret( 0 )
	}
    }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    if sample_loop
    {
	if wav_ptr >= wav_size
	{
	    $ff = ( wav_ptr - wav_size ) / wav_channels
	    $p = $frames - $ff
	    wav_ptr = 0
	    $c = 0 while( $c < wav_channels )
	    {
		copy( $channels[ $c ], wav, $p, wav_ptr + $c, $ff, 1, wav_channels )
    		$c + 1
	    }
	    wav_ptr + $ff * wav_channels
	}
    }
    ret( 1 )
}

fn atodb($a){
// converts amplitude to dB
	$db = 20*log10($a)
ret($db)
}
fn dbtoa($db){
// converts db to linear amplitude
	$a = pow(10,($db)*0.05)
ret($a)
}

fn hsvrgb($h,$s,$v){
// HSV to RGB
$h = $h % 360
$M = 255*$v
$m = $M*(1- $s)
$z = 255*($v*$s)*(1-abs(mod(($h/60.0),2)-1))
if $h < 60 {
$r = $M
$g = $z + $m
$b = $m
}else{
if $h < 120 {
$r = $z + $m
$g = $M 
$b = $m
}else{
if $h < 180 {
$r = $m 
$g = $M 
$b = $z + $m
}else{
if $h < 240 {
$r = $m 
$g = $z + $m
$b = $M 
}else{
if $h < 300 {
$r = $z + $m
$g = $m 
$b = $M 
}else{
$r = $M 
$g = $m 
$b = $z + $m
}}}}
}
ret(get_color($r ,$g ,$b ))
}



ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

I deleted the previous version. There had been a lot of mistakes, including type msimatches of containers etc.

Now it works as intended, video will be added later.

What it does:

takes FFT from audio.
converts to N number of equally spaced bands according to human hearing.
(7 in the next example)
creates an interpolated lookup of 256 values from this, with peak follower/decay.

takes an image or frame from video.
converts to YCbCr, uses the luminance to value to look up the value in the table.

overlays the luminance with it self ("overlay" blending mode, which is "multiply" for < 127 and "screen" for > 127)
and with the intensity from the lookup, baswd on the pixels brightness and the loudness in the spectrum for the frequency.

this means dark pixels get darker when there is a dark freqeuncy, and gright pixels get brightrt when there is a high frequency.

creates an alpha mask from that as well.

then the last frame from the screen is taken, stretched according to overall loudness,
and blended on with the alpha.

the new frame, which is ow darker and brighter depending on the spectrum, is blended on top, with the same alpha:

loud frequencies are less transparent, and updated more.

The effect is:

blinking of brightness/darkness, and time delay, and spatial widening, depending on the audio.

The outcome depends very much on the music and images.

Example will follow in a few minutes, but you need to try this with your own stuff, too.

Code for reference:

Code: Select all

set_pixel_size( WINDOW_XSIZE / 640 )
resize( get_screen(), 640, 360 )

scr = get_screen()
xsize = 640//get_xsize( scr )
ysize = 360//get_ysize( scr )
fps = 20

// INPUT VIDEO
mpath = "/home/nancy/Documents/pixilang/pixilang3/examples/myscripts/media/dance_loop_long_360.mp4"

//Input WAV file:
filename = "/home/nancy/Documents/pixilang/pixilang3/examples/myscripts/media/audio.wav"
preamp = 1.4// amplify audio for analysis , effect amount
//

//Output MJPEG AVI video file:
avi_filename = "test_out 7b 3.avi"

// Effect:
bands = 9 // number of bands
offsetmax = 1.8// dynamic offset in pixels

include "../../lib/ffmpeg_video_export.pixi"
include "../../lib/ffmpeg_video_import.pixi"
include "../../lib/mjpeg.pixi"

framecount = 0
startframe = 1
maxframe = ((5*60)+22)*20

vid_import = ffmpeg_video_import_open( 
mpath, 
xsize, ysize, 
startframe, maxframe )

wav = load( filename )
//Sound options:
sample_rate_scale = 1
logf( "WAV INFO:\n" )
logf( "  Sample Rate: %d\n", wav.sample_rate )
logf( "  Channels: %d\n", wav.channels )
logf( "  Loop Start (sample number): %d\n", wav.loop_start )
logf( "  Loop Length (number of samples): %d\n", wav.loop_len )
logf( "  Loop Type (0-none; 1-normal; 2-bidirectional): %d\n", wav.loop_type )
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
samplerate = wav.sample_rate
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}
//-------------------


gfx_init()
start_timer(0)

// MAIN LOOP ####################################################
while( framecount < maxframe *2 )
{

anaudio(wav_ptr)

ffmpeg_video_import_read(vid_import,imgbuf)
if framecount == 0 {
pixi(imgbuf)
}

copy(screenclone, scr)

split_ycbcr( 0, imgbuf, imgY, imgCb, imgCr )
split_ycbcr( 0, screenclone, imgW_AY, imgW_ACb, imgW_ACr )

//copy(imgW, imgY)

// OVERLAY MODE
for( $i = 0; $i < get_size(imgY);$i+1){
$rY = get_red(imgY[$i])
$m1f = 1 - lmapsmoo[$rY]
if $rY < 127 {
$temp= ( $rY*$m1f +  ($rY * $rY ) >> 7 )* lmapsmoo[$rY]  
}else{
$temp = 255 - $rY
$temp = ( $rY*$m1f + (255 - ($temp * $temp) >> 7 )* lmapsmoo[$rY] )
}
//imgW_A[$i] = 127*lmapsmoo[get_red(imgW_AY[$i])]  + 127*lmapsmoo[$rY]
imgW_A[$i] = 63*lmapsmoo[get_red(imgW_AY[$i])]  + 191*lmapsmoo[$rY]
imgW[$i] = get_color($temp,0,0)
}
//op_cn(OP_RSHIFT,imgW_A, 1)
//op_cn(OP_ADD, imgW_A, 127)
op_cn(OP_MUL,imgW_A, 0.75)
op_cn(OP_ADD, imgW_A, 63)


split_ycbcr( 1, imgW, imgW, imgCb, imgCr )


transp(255)//247)
pixi(screenclone,0,0,WHITE,(xsize+offsetmax*lmapsum)/xsize,(ysize+offsety*lmapsum)/ysize)

transp(47)//31
//pixi(imgW)
pixi(imgW)


// ERB Spectrum for testing
/*
for ($i = 0; $i <256 ; $i+1){
line(-127 + $i, 160 -lmapsmoo[$i]*32,-127 + $i, 160, get_color($i,$i,$i))
}
*/

//
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if !sample_loop && wav_ptr >= wav_size { breakall }
    //while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { halt } }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { breakall } 
    }
    framecount + 1;     
}




//#######################################################################


ffmpeg_video_import_close(vid_import)
if vo
{
    //Close Video Export:
    mjpeg_encoder_close( vo_encoder )
    fclose( vo_f )
    i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }
    remove( vo_audio_ch_bufs )
    remove( vo_audio_buf )
}
i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }

// ####################################################################
// FUNCTIONS ####################################################
fn gfx_init()
{
// initiala nd global  values
	preampdivwavmax = preamp / wav_amp_max  	
	ftsize = 4096
	ftbinf = wav.sample_rate/ftsize
	ftbinfrez = 1 / ftbinf
	hftsize = ftsize / 2
	ftbufr = new( ftsize, 1, FLOAT )
 	ftbufi = new( ftsize, 1, FLOAT )
 	ftmag = new( hftsize, 1, FLOAT )
 	ftmagsmoo = new( hftsize, 1, FLOAT )

	winlen = 2* wav.sample_rate/fps 
	wintable = new( winlen, 1, FLOAT )
	$i = 0 while $i <= winlen{
	wintable[$i] = cos(2*M_PI*$i/winlen)*0.5 + 0.5
	$i + 1
	}	
	
	// ERB scale
	erbsize = bands * 2
	erbbuf = new( erbsize, 1, FLOAT)
	erbiftlookup = new( erbsize, 1, INT32)
	
	// ERB filter lookup
	//
	$size = erbsize
	$ERBmin = 1
	$ERBrange = 40 -$ERBmin

	$i = 0 while $i < erbsize
	{
	$erb = $ERBrange*$i/(erbsize) + $ERBmin
	$erbf = ($erb) * 0.046729
	$erbf = pow(10,$erbf) -1
	$erbf = $erbf * 228.833 
	erbiftlookup[$i] = ($erbf * ftbinfrez) div 1
	$i +1
	}
	
	// create weights
	ftweight = new( hftsize, 1, FLOAT )
	$i = 0 while $i < erbsize{
		$fromidx = erbiftlookup[$i ]
		$toidx = erbiftlookup[$i + 1 ] -1	
		$d = $toidx - $fromidx 
		$j = 0 while $j < $d{
			$ii = $fromidx +$j
			$w = $j / ($d -1)	
		ftweight[$ii] = $w
			$j = $j +1
		}
 	$i +1
 	}
	
	//----------
	
	// 256 lookup from spectrum
	lmap = new( 256, 1, FLOAT) 
	lmapsmoo = new( 256, 1, FLOAT) // smoothed version
	
	C_GREY = get_color(127,127,127) // neutral grey
	C_erb = WHITE
	
	pxl32 = new(3, 1, INT32)// working pixel 32 bit
	
offsety = offsetmax*9/16

imgbuf = new(xsize,ysize)// frame
screenclone = new(xsize,ysize)// screen copy
imgY = clone(imgbuf)
imgCb = clone(imgbuf)	
imgCr = clone(imgbuf)
imgW = clone(imgbuf) // working buffer
imgW_ACb = clone(imgbuf)	
imgW_ACr = clone(imgbuf)	
imgW_AY = clone(imgbuf)	
imgW_A = new(xsize,ysize, INT8)// alpha channel buffer
set_alpha( screenclone, imgW_A )
set_alpha( imgW, imgW_A )
set_flags(screenclone, RESIZE_COLOR_INTERP2)
set_flags(imgW_A, RESIZE_INTERP2)
}

fn anaudio(){

// FFT
	clean(ftbufi)
    	clean(ftbufr)

    $p = wav_ptr
    $t = $p / wav_size
    $i = 0 while $i < ftsize
    {
	if $i <=  winlen{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = wav[ $p + $i * wav_channels ] * preampdivwavmax * wintable[$i]
	}else{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = 0
	}
	$i + 1
    }	
fft(1, ftbufi,ftbufr,ftsize)
	
	// map on ERB scale 
	// for color
	$erbgrav = 0
	$erbvmax = 0
	$erbsum = 0
	$i = 0 while $i < erbsize
	{
	erbbuf[$i]=0
	// pass 1 up
	$fromidx = erbiftlookup[$i ]
	$toidx = erbiftlookup[$i + 1 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * ftweight[$ii]		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	// pass 2 down
	$fromidx = erbiftlookup[$i + 1 ]
	$toidx = erbiftlookup[$i +2 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * (1-ftweight[$ii])		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	$erbsum = $erbsum + erbuf[$i]
	$erbgrav = $erbgrav + erbbuf[$i] * ($i -erbsize/2)
	$i + 1
	}
	
	if $erbvmax > 1 { $erbvmax = 1 } 
	//$cerbold = C_erb
	C_erb = get_blend(C_erb, hsvrgb( 2880 - $erbgrav * (360/8), $erbsum,0.85 + 0.5 *$erbsum),12)
	//------------------------------------------
	
	// create light lookup map
	lmapsum=0
	$size = 256
	$i = 0 while $i < $size 
	{ 	
	$erbi = ($i * erbsize / $size ) 
	$frac = mod($erbi,1) 
	
	$erbi = $erbi div 1
	$v = erbbuf[$erbi] 
	if $erbi < $size -1 {
	$v2 = erbbuf[$erbi +1] 	
	$v = $v + ($v2 - $v) * $frac}
		
	lmap[$i] = (atodb($v) +54 )/54 
	

	if lmap[$i] > lmapsmoo[$i] {
		lmapsmoo[$i] = lmap[$i]
	}else{ lmapsmoo[$i] = lmapsmoo[$i]*0.96}//84}
	
	if lmapsmoo[$i] > 1 { lmapsmoo[$i] = 1}
	if lmapsmoo[$i] < 0 { lmapsmoo[$i] = 0}
	lmapsum = lmapsum + lmapsmoo[$i]/bands
	$i + 1
	}	
ret(1)
}


fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size
    {
	if !sample_loop
	{
	    ret( 0 )
	}
    }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    if sample_loop
    {
	if wav_ptr >= wav_size
	{
	    $ff = ( wav_ptr - wav_size ) / wav_channels
	    $p = $frames - $ff
	    wav_ptr = 0
	    $c = 0 while( $c < wav_channels )
	    {
		copy( $channels[ $c ], wav, $p, wav_ptr + $c, $ff, 1, wav_channels )
    		$c + 1
	    }
	    wav_ptr + $ff * wav_channels
	}
    }
    ret( 1 )
}

fn atodb($a){
// converts amplitude to dB
	$db = 20*log10($a)
ret($db)
}
fn dbtoa($db){
// converts db to linear amplitude
	$a = pow(10,($db)*0.05)
ret($a)
}

fn hsvrgb($h,$s,$v){
// HSV to RGB
$h = $h % 360
$M = 255*$v
$m = $M*(1- $s)
$z = 255*($v*$s)*(1-abs(mod(($h/60.0),2)-1))
if $h < 60 {
$r = $M
$g = $z + $m
$b = $m
}else{
if $h < 120 {
$r = $z + $m
$g = $M 
$b = $m
}else{
if $h < 180 {
$r = $m 
$g = $M 
$b = $z + $m
}else{
if $h < 240 {
$r = $m 
$g = $z + $m
$b = $M 
}else{
if $h < 300 {
$r = $z + $m
$g = $m 
$b = $M 
}else{
$r = $M 
$g = $m 
$b = $z + $m
}}}}
}
ret(get_color($r ,$g ,$b ))
}



ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

renders 3.7 fps for 640×360 on Raspberry 400.
Means that it should run realtime on office machines or faster.
Probablx also on modern phones.

ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

I moved all effect parameters up to the header so they can be changed easily.

Also added pre- and post histogramme equalisation, its slow but optional you can switch them off with a flag in the effecct settings.

Code: Select all

set_pixel_size( WINDOW_XSIZE / 640 )
resize( get_screen(), 640, 360 )

scr = get_screen()
xsize = 640//get_xsize( scr )
ysize = 360//get_ysize( scr )
fps = 20

// INPUT VIDEO
mpath = "/home/nancy/Documents/pixilang/pixilang3/examples/myscripts/media/astron_360.mp4"

//Input WAV file:
filename = "/home/nancy/Documents/pixilang/pixilang3/examples/myscripts/media/drums.wav"
//

//Output MJPEG AVI video file:
avi_filename = "test_out 7b 3.avi"

// Effect Settings:
preamp = 1.5// amplify audio for analysis, amplifies effect amount only, not the audio
bands = 9 // number of bands
offsetmax = 2.6// dynamic offset in pixels
preeq = 1 // histogramme pre-equalisation flag- slow
posteq = 1  // histogramme post equalisation flag - slow
frametransparency = 63 // base transp of new frame
alphabalance = 63// balance of background (0) and foreground (255) weights for alpha channel
alphamin = 7 // alpha channel minimum opacity 0-255

//################################################################################

include "../../lib/ffmpeg_video_export.pixi"
include "../../lib/ffmpeg_video_import.pixi"
include "../../lib/mjpeg.pixi"

framecount = 0
startframe = 1
maxframe = ((5*60)+22)*20

vid_import = ffmpeg_video_import_open( 
mpath, 
xsize, ysize, 
startframe, maxframe )

wav = load( filename )
//Sound options:
sample_rate_scale = 1
logf( "WAV INFO:\n" )
logf( "  Sample Rate: %d\n", wav.sample_rate )
logf( "  Channels: %d\n", wav.channels )
logf( "  Loop Start (sample number): %d\n", wav.loop_start )
logf( "  Loop Length (number of samples): %d\n", wav.loop_len )
logf( "  Loop Type (0-none; 1-normal; 2-bidirectional): %d\n", wav.loop_type )
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
samplerate = wav.sample_rate
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}
//-------------------


gfx_init()
start_timer(0)

// MAIN LOOP ####################################################
while( framecount < maxframe *2 )
{

anaudio(wav_ptr)

ffmpeg_video_import_read(vid_import,imgbuf)
if framecount == 0 {
pixi(imgbuf)
}

copy(screenclone, scr)

split_ycbcr( 0, imgbuf, imgY, imgCb, imgCr )
split_ycbcr( 0, screenclone, imgW_AY, imgW_ACb, imgW_ACr )

//copy(imgW, imgY)

if preeq { imgY = eq_red(imgY) }

// OVERLAY MODE
for( $i = 0; $i < get_size(imgY);$i+1){
$rY = get_red(imgY[$i])
$m1f = 1 - lmapsmoo[$rY]
if $rY < 127 {
$temp= ( $rY*$m1f +  ($rY * $rY ) >> 7 )* lmapsmoo[$rY]  
}else{
$temp = 255 - $rY
$temp = ( $rY*$m1f + (255 - ($temp * $temp) >> 7 )* lmapsmoo[$rY] )
}
//imgW_A[$i] = 127*lmapsmoo[get_red(imgW_AY[$i])]  + 127*lmapsmoo[$rY]
imgW_A[$i] = alphabalance*lmapsmoo[get_red(imgW_AY[$i])]  + (255- alphabalance)*lmapsmoo[$rY]
imgW[$i] = get_color($temp,0,0)
}
//op_cn(OP_RSHIFT,imgW_A, 1)
//op_cn(OP_ADD, imgW_A, 127)
op_cn(OP_MUL,imgW_A, (255-alphamin)/255)
op_cn(OP_ADD, imgW_A, alphamin)

if posteq { imgW = eq_red(imgW) }


split_ycbcr( 1, imgW, imgW, imgCb, imgCr )


transp(255)//247)
pixi(screenclone,0,0,WHITE,(xsize+offsetmax*lmapsum)/xsize,(ysize+offsety*lmapsum)/ysize)

transp(frametransparency)//31
//pixi(imgW)
pixi(imgW)


// ERB Spectrum for testing
/*
for ($i = 0; $i <256 ; $i+1){
line(-127 + $i, 160 -lmapsmoo[$i]*32,-127 + $i, 160, get_color($i,$i,$i))
}
*/

//
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if !sample_loop && wav_ptr >= wav_size { breakall }
    //while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { halt } }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { breakall } 
    }
    framecount + 1;     
}




//#######################################################################


ffmpeg_video_import_close(vid_import)
if vo
{
    //Close Video Export:
    mjpeg_encoder_close( vo_encoder )
    fclose( vo_f )
    i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }
    remove( vo_audio_ch_bufs )
    remove( vo_audio_buf )
}
i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }

// ####################################################################
// FUNCTIONS ####################################################
fn gfx_init()
{
// initiala nd global  values
	preampdivwavmax = preamp / wav_amp_max  	
	ftsize = 4096
	ftbinf = wav.sample_rate/ftsize
	ftbinfrez = 1 / ftbinf
	hftsize = ftsize / 2
	ftbufr = new( ftsize, 1, FLOAT )
 	ftbufi = new( ftsize, 1, FLOAT )
 	ftmag = new( hftsize, 1, FLOAT )
 	ftmagsmoo = new( hftsize, 1, FLOAT )

	winlen = 2* wav.sample_rate/fps 
	wintable = new( winlen, 1, FLOAT )
	$i = 0 while $i <= winlen{
	wintable[$i] = cos(2*M_PI*$i/winlen)*0.5 + 0.5
	$i + 1
	}	
	
	// ERB scale
	erbsize = bands * 2
	erbbuf = new( erbsize, 1, FLOAT)
	erbiftlookup = new( erbsize, 1, INT32)
	
	// ERB filter lookup
	//
	$size = erbsize
	$ERBmin = 1
	$ERBrange = 40 -$ERBmin

	$i = 0 while $i < erbsize
	{
	$erb = $ERBrange*$i/(erbsize) + $ERBmin
	$erbf = ($erb) * 0.046729
	$erbf = pow(10,$erbf) -1
	$erbf = $erbf * 228.833 
	erbiftlookup[$i] = ($erbf * ftbinfrez) div 1
	$i +1
	}
	
	// create weights
	ftweight = new( hftsize, 1, FLOAT )
	$i = 0 while $i < erbsize{
		$fromidx = erbiftlookup[$i ]
		$toidx = erbiftlookup[$i + 1 ] -1	
		$d = $toidx - $fromidx 
		$j = 0 while $j < $d{
			$ii = $fromidx +$j
			$w = $j / ($d -1)	
		ftweight[$ii] = $w
			$j = $j +1
		}
 	$i +1
 	}
	
	//----------
	
	// 256 lookup from spectrum
	lmap = new( 256, 1, FLOAT) 
	lmapsmoo = new( 256, 1, FLOAT) // smoothed version
	
	C_GREY = get_color(127,127,127) // neutral grey
	C_erb = WHITE
	
	pxl32 = new(3, 1, INT32)// working pixel 32 bit
	
offsety = offsetmax*9/16

histogramme = new(256,1,FLOAT)
eqtable = new(256,1,FLOAT)

imgbuf = new(xsize,ysize)// frame
screenclone = new(xsize,ysize)// screen copy
imgY = clone(imgbuf)
imgCb = clone(imgbuf)	
imgCr = clone(imgbuf)
imgW = clone(imgbuf) // working buffer
imgW_ACb = clone(imgbuf)	
imgW_ACr = clone(imgbuf)	
imgW_AY = clone(imgbuf)	
imgW_A = new(xsize,ysize, INT8)// alpha channel buffer
set_alpha( screenclone, imgW_A )
set_alpha( imgW, imgW_A )
set_flags(screenclone, RESIZE_COLOR_INTERP2)
set_flags(imgW_A, RESIZE_INTERP2)
}

fn anaudio(){

// FFT
	clean(ftbufi)
    	clean(ftbufr)

    $p = wav_ptr
    $t = $p / wav_size
    $i = 0 while $i < ftsize
    {
	if $i <=  winlen{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = wav[ $p + $i * wav_channels ] * preampdivwavmax * wintable[$i]
	}else{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = 0
	}
	$i + 1
    }	
fft(1, ftbufi,ftbufr,ftsize)
	
	// map on ERB scale 
	// for color
	$erbgrav = 0
	$erbvmax = 0
	$erbsum = 0
	$i = 0 while $i < erbsize
	{
	erbbuf[$i]=0
	// pass 1 up
	$fromidx = erbiftlookup[$i ]
	$toidx = erbiftlookup[$i + 1 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * ftweight[$ii]		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	// pass 2 down
	$fromidx = erbiftlookup[$i + 1 ]
	$toidx = erbiftlookup[$i +2 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * (1-ftweight[$ii])		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	$erbsum = $erbsum + erbuf[$i]
	$erbgrav = $erbgrav + erbbuf[$i] * ($i -erbsize/2)
	$i + 1
	}
	
	if $erbvmax > 1 { $erbvmax = 1 } 
	//$cerbold = C_erb
	C_erb = get_blend(C_erb, hsvrgb( 2880 - $erbgrav * (360/8), $erbsum,0.85 + 0.5 *$erbsum),12)
	//------------------------------------------
	
	// create light lookup map
	lmapsum=0
	$size = 256
	$i = 0 while $i < $size 
	{ 	
	$erbi = ($i * erbsize / $size ) 
	$frac = mod($erbi,1) 
	
	$erbi = $erbi div 1
	$v = erbbuf[$erbi] 
	if $erbi < $size -1 {
	$v2 = erbbuf[$erbi +1] 	
	$v = $v + ($v2 - $v) * $frac}
		
	lmap[$i] = (atodb($v) +54 )/54 
	

	if lmap[$i] > lmapsmoo[$i] {
		lmapsmoo[$i] = lmap[$i]
	}else{ lmapsmoo[$i] = lmapsmoo[$i]*0.96}//84}
	
	if lmapsmoo[$i] > 1 { lmapsmoo[$i] = 1}
	if lmapsmoo[$i] < 0 { lmapsmoo[$i] = 0}
	lmapsum = lmapsum + lmapsmoo[$i]/bands
	$i + 1
	}	
ret(1)
}


fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size
    {
	if !sample_loop
	{
	    ret( 0 )
	}
    }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    if sample_loop
    {
	if wav_ptr >= wav_size
	{
	    $ff = ( wav_ptr - wav_size ) / wav_channels
	    $p = $frames - $ff
	    wav_ptr = 0
	    $c = 0 while( $c < wav_channels )
	    {
		copy( $channels[ $c ], wav, $p, wav_ptr + $c, $ff, 1, wav_channels )
    		$c + 1
	    }
	    wav_ptr + $ff * wav_channels
	}
    }
    ret( 1 )
}

fn atodb($a){
// converts amplitude to dB
	$db = 20*log10($a)
ret($db)
}
fn dbtoa($db){
// converts db to linear amplitude
	$a = pow(10,($db)*0.05)
ret($a)
}

fn hsvrgb($h,$s,$v){
// HSV to RGB
$h = $h % 360
$M = 255*$v
$m = $M*(1- $s)
$z = 255*($v*$s)*(1-abs(mod(($h/60.0),2)-1))
if $h < 60 {
$r = $M
$g = $z + $m
$b = $m
}else{
if $h < 120 {
$r = $z + $m
$g = $M 
$b = $m
}else{
if $h < 180 {
$r = $m 
$g = $M 
$b = $z + $m
}else{
if $h < 240 {
$r = $m 
$g = $z + $m
$b = $M 
}else{
if $h < 300 {
$r = $z + $m
$g = $m 
$b = $M 
}else{
$r = $M 
$g = $m 
$b = $z + $m
}}}}
}
ret(get_color($r ,$g ,$b ))
}

fn get_hist_red($img){
	clean(histogramme)
	$i = 0 
	$fac = 1/( get_size($img) )
	while $i < get_size($img){
	histogramme[get_red($img[$i])] = histogramme[get_red($img[$i])] + $fac
	$i + 1
	}
ret(1)
}


fn eq_red($img){
	get_hist_red($img)	
	copy(eqtable, histogramme)
	op_cn(OP_H_INTEGRAL, eqtable, 1)
	op_cn(OP_SMUL, eqtable, 255)
	for( $i=0; $i < get_size($img); $i + 1){
		$img[$i] = get_color( eqtable[get_red($img[$i])] , 0, 0)
	}
ret($img)
}


ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

changes:

better parameter values
equalisation now a variable mix with original, 0..255
effect squared for more dynamic
blending of the stretched old frames in 3 layers with different amount for more smoothness and variation

Code: Select all

set_pixel_size( WINDOW_XSIZE / 640 )
resize( get_screen(), 640, 360 )

scr = get_screen()
xsize = 640//get_xsize( scr )
ysize = 360//get_ysize( scr )
fps = 20

// INPUT VIDEO
mpath = "/home/nancy/Documents/pixilang/pixilang3/examples/myscripts/media/ciel 1.mp4"

//Input WAV file:
filename = "/home/nancy/Documents/pixilang/pixilang3/examples/myscripts/media/Ciel Ouvert.wav"
//

//Output MJPEG AVI video file:
avi_filename = "ciel ouvert 4 varieq.avi"

// Effect Settings:
preamp = 0.7// amplify audio for analysis, amplifies effect amount only, not the audio
bands = 8 // number of bands
offsetmax = 1.667// dynamic offset in pixels
preeq = 127 // histogramme pre-equalisation - slow if on, 0..255
posteq = 158  // histogramme post equalisation  - slow, 0..255
frametransparency = 63 // base transp of new frame
alphabalance = 95// balance of background (0, past frames) and foreground (255, new frame) weights for alpha channel
alphamin = 127 // alpha channel minimum opacity 0-255

//################################################################################

include "../../lib/ffmpeg_video_export.pixi"
include "../../lib/ffmpeg_video_import.pixi"
include "../../lib/mjpeg.pixi"

framecount = 0
startframe = 1
maxframe = ((5*60)+22)*20

vid_import = ffmpeg_video_import_open( 
mpath, 
xsize, ysize, 
startframe, maxframe )

wav = load( filename )
//Sound options:
sample_rate_scale = 1
logf( "WAV INFO:\n" )
logf( "  Sample Rate: %d\n", wav.sample_rate )
logf( "  Channels: %d\n", wav.channels )
logf( "  Loop Start (sample number): %d\n", wav.loop_start )
logf( "  Loop Length (number of samples): %d\n", wav.loop_len )
logf( "  Loop Type (0-none; 1-normal; 2-bidirectional): %d\n", wav.loop_type )
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
samplerate = wav.sample_rate
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}
//-------------------


gfx_init()
start_timer(0)

// MAIN LOOP ####################################################
while( framecount < maxframe *2 )
{

anaudio(wav_ptr)

ffmpeg_video_import_read(vid_import,imgbuf)
if framecount == 0 {
pixi(imgbuf)
}

copy(screenclone, scr)

split_ycbcr( 0, imgbuf, imgY, imgCb, imgCr )
split_ycbcr( 0, screenclone, imgW_AY, imgW_ACb, imgW_ACr )

//copy(imgW, imgY)

if preeq { imgY = eq_red(imgY,preeq/255) }

// OVERLAY MODE
for( $i = 0; $i < get_size(imgY);$i+1){
$rY = get_red(imgY[$i])
$m1f = 1 - lmapsmoo[$rY]
if $rY < 127 {
$temp= ( $rY*$m1f +  ($rY * $rY ) >> 7 )* lmapsmoo[$rY]  
}else{
$temp = 255 - $rY
$temp = ( $rY*$m1f + (255 - ($temp * $temp) >> 7 )* lmapsmoo[$rY] )
}
//imgW_A[$i] = 127*lmapsmoo[get_red(imgW_AY[$i])]  + 127*lmapsmoo[$rY]
imgW_A[$i] = alphabalance*lmapsmoo[get_red(imgW_AY[$i])]  + (255- alphabalance)*lmapsmoo[$rY]
imgW[$i] = get_color($temp,0,0)
}
//op_cn(OP_RSHIFT,imgW_A, 1)
//op_cn(OP_ADD, imgW_A, 127)
op_cn(OP_MUL,imgW_A, (255-alphamin)/255)
op_cn(OP_ADD, imgW_A, alphamin)

if posteq { imgW = eq_red(imgW, posteq/255) }


split_ycbcr( 1, imgW, imgW, imgCb, imgCr )


transp(255)//247)
pixi(screenclone,0,0,WHITE,(xsize+offsetmax*lmapsum)/xsize,(ysize+offsety*lmapsum)/ysize)
transp(127)
pixi(screenclone,0,0,WHITE,(xsize+offsetmax*lmapsum*0.62)/xsize,(ysize+offsety*lmapsum*0.62)/ysize)
transp(63)
pixi(screenclone,0,0,WHITE,(xsize+offsetmax*lmapsum*0.38)/xsize,(ysize+offsety*lmapsum*0.38)/ysize)

transp(frametransparency)//31
//pixi(imgW)
pixi(imgW)


// ERB Spectrum for testing
/*
for ($i = 0; $i <256 ; $i+1){
line(-127 + $i, 160 -lmapsmoo[$i]*32,-127 + $i, 160, get_color($i,$i,$i))
}

*/
//
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if !sample_loop && wav_ptr >= wav_size { breakall }
    //while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { halt } }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { breakall } 
    }
    framecount + 1;     
}




//#######################################################################


ffmpeg_video_import_close(vid_import)
if vo
{
    //Close Video Export:
    mjpeg_encoder_close( vo_encoder )
    fclose( vo_f )
    i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }
    remove( vo_audio_ch_bufs )
    remove( vo_audio_buf )
}
i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }

// ####################################################################
// FUNCTIONS ####################################################
fn gfx_init()
{
// initiala nd global  values
	preampdivwavmax = preamp / wav_amp_max  	
	ftsize = 4096
	ftbinf = wav.sample_rate/ftsize
	ftbinfrez = 1 / ftbinf
	hftsize = ftsize / 2
	ftbufr = new( ftsize, 1, FLOAT )
 	ftbufi = new( ftsize, 1, FLOAT )
 	ftmag = new( hftsize, 1, FLOAT )
 	ftmagsmoo = new( hftsize, 1, FLOAT )

	winlen = 2* wav.sample_rate/fps 
	wintable = new( winlen, 1, FLOAT )
	$i = 0 while $i <= winlen{
	wintable[$i] = cos(2*M_PI*$i/winlen)*0.5 + 0.5
	$i + 1
	}	
	
	// ERB scale
	erbsize = bands * 2
	erbbuf = new( erbsize, 1, FLOAT)
	erbiftlookup = new( erbsize, 1, INT32)
	
	// ERB filter lookup
	//
	$size = erbsize
	$ERBmin = 1
	$ERBrange = 40 -$ERBmin

	$i = 0 while $i < erbsize
	{
	$erb = $ERBrange*$i/(erbsize) + $ERBmin
	$erbf = ($erb) * 0.046729
	$erbf = pow(10,$erbf) -1
	$erbf = $erbf * 228.833 
	erbiftlookup[$i] = ($erbf * ftbinfrez) div 1
	$i +1
	}
	
	// create weights
	ftweight = new( hftsize, 1, FLOAT )
	$i = 0 while $i < erbsize{
		$fromidx = erbiftlookup[$i ]
		$toidx = erbiftlookup[$i + 1 ] -1	
		$d = $toidx - $fromidx 
		$j = 0 while $j < $d{
			$ii = $fromidx +$j
			$w = $j / ($d -1)	
		ftweight[$ii] = $w
			$j = $j +1
		}
 	$i +1
 	}
	
	//----------
	
	// 256 lookup from spectrum
	lmap = new( 256, 1, FLOAT) 
	lmapsmoo = new( 256, 1, FLOAT) // smoothed version
	
	C_GREY = get_color(127,127,127) // neutral grey
	C_erb = WHITE
	
	pxl32 = new(3, 1, INT32)// working pixel 32 bit
	
offsety = offsetmax*9/16

histogramme = new(256,1,FLOAT)
eqtable = new(256,1,FLOAT)

imgbuf = new(xsize,ysize)// frame
screenclone = new(xsize,ysize)// screen copy
imgY = clone(imgbuf)
imgCb = clone(imgbuf)	
imgCr = clone(imgbuf)
imgW = clone(imgbuf) // working buffer
imgW_ACb = clone(imgbuf)	
imgW_ACr = clone(imgbuf)	
imgW_AY = clone(imgbuf)	
imgW_A = new(xsize,ysize, INT8)// alpha channel buffer
set_alpha( screenclone, imgW_A )
set_alpha( imgW, imgW_A )
set_flags(screenclone, RESIZE_COLOR_INTERP2)
set_flags(imgW_A, RESIZE_INTERP2)
}

fn anaudio(){

// FFT
	clean(ftbufi)
    	clean(ftbufr)

    $p = wav_ptr
    $t = $p / wav_size
    $i = 0 while $i < ftsize
    {
	if $i <=  winlen{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = wav[ $p + $i * wav_channels ] * preampdivwavmax * wintable[$i]
	}else{
	ftbufi[ $i ] = 0
	ftbufr[ $i ] = 0
	}
	$i + 1
    }	
fft(1, ftbufi,ftbufr,ftsize)
	
	// map on ERB scale 
	// for color
	$erbgrav = 0
	$erbvmax = 0
	$erbsum = 0
	$i = 0 while $i < erbsize
	{
	erbbuf[$i]=0
	// pass 1 up
	$fromidx = erbiftlookup[$i ]
	$toidx = erbiftlookup[$i + 1 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * ftweight[$ii]		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	// pass 2 down
	$fromidx = erbiftlookup[$i + 1 ]
	$toidx = erbiftlookup[$i +2 ] -1	
	$d = $toidx - $fromidx 
	$j = 0 while $j < $d
		{
		$ii = $fromidx + $j
		$v = sqrt(ftbufr[$ii]*ftbufr[$ii]+ftbufi[$ii]*ftbufi[$ii])
			erbbuf[$i] = erbbuf[$i] + $v * (1-ftweight[$ii])		
		$j = $j +1 
		}
	erbbuf[$i] = erbbuf[$i] *80/$d
	$erbsum = $erbsum + erbuf[$i]
	$erbgrav = $erbgrav + erbbuf[$i] * ($i -erbsize/2)
	$i + 1
	}
	
	if $erbvmax > 1 { $erbvmax = 1 } 
	//$cerbold = C_erb
	C_erb = get_blend(C_erb, hsvrgb( 2880 - $erbgrav * (360/8), $erbsum,0.85 + 0.5 *$erbsum),12)
	//------------------------------------------
	
	// create light lookup map
	lmapsum=0
	$size = 256
	$i = 0 while $i < $size 
	{ 	
	$erbi = ($i * erbsize / $size ) 
	$frac = mod($erbi,1) 
	
	$erbi = $erbi div 1
	$v = erbbuf[$erbi] 
	if $erbi < $size -1 {
	$v2 = erbbuf[$erbi +1] 	
	$v = $v + ($v2 - $v) * $frac}
		
	lmap[$i] = (atodb($v) +54 )/54 
	

	if lmap[$i] > lmapsmoo[$i] {
		lmapsmoo[$i] = lmap[$i]
	}else{ lmapsmoo[$i] = lmapsmoo[$i]*0.92}//96}
	
	if lmapsmoo[$i] > 1 { lmapsmoo[$i] = 1}
	if lmapsmoo[$i] < 0 { lmapsmoo[$i] = 0}

	lmapsum = lmapsum + lmapsmoo[$i]/bands
	lmapsmoo[$i] = lmapsmoo[$i] * lmapsmoo[$i]
	$i + 1
	}	
ret(1)
}


fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size
    {
	if !sample_loop
	{
	    ret( 0 )
	}
    }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    if sample_loop
    {
	if wav_ptr >= wav_size
	{
	    $ff = ( wav_ptr - wav_size ) / wav_channels
	    $p = $frames - $ff
	    wav_ptr = 0
	    $c = 0 while( $c < wav_channels )
	    {
		copy( $channels[ $c ], wav, $p, wav_ptr + $c, $ff, 1, wav_channels )
    		$c + 1
	    }
	    wav_ptr + $ff * wav_channels
	}
    }
    ret( 1 )
}

fn atodb($a){
// converts amplitude to dB
	$db = 20*log10($a)
ret($db)
}
fn dbtoa($db){
// converts db to linear amplitude
	$a = pow(10,($db)*0.05)
ret($a)
}

fn hsvrgb($h,$s,$v){
// HSV to RGB
$h = $h % 360
$M = 255*$v
$m = $M*(1- $s)
$z = 255*($v*$s)*(1-abs(mod(($h/60.0),2)-1))
if $h < 60 {
$r = $M
$g = $z + $m
$b = $m
}else{
if $h < 120 {
$r = $z + $m
$g = $M 
$b = $m
}else{
if $h < 180 {
$r = $m 
$g = $M 
$b = $z + $m
}else{
if $h < 240 {
$r = $m 
$g = $z + $m
$b = $M 
}else{
if $h < 300 {
$r = $z + $m
$g = $m 
$b = $M 
}else{
$r = $M 
$g = $m 
$b = $z + $m
}}}}
}
ret(get_color($r ,$g ,$b ))
}

fn get_hist_red($img){
	clean(histogramme)
	$i = 0 
	$fac = 1/( get_size($img) )
	while $i < get_size($img){
	histogramme[get_red($img[$i])] = histogramme[get_red($img[$i])] + $fac
	$i + 1
	}
ret(1)
}


fn eq_red($img, $amount){
	get_hist_red($img)	
	copy(eqtable, histogramme)
	op_cn(OP_H_INTEGRAL, eqtable, 1)
	op_cn(OP_SMUL, eqtable, 255)
	$am1m = 1 - $amount
	for( $i=0; $i < get_size($img); $i + 1){
		$img[$i] = get_color( eqtable[get_red($img[$i])*$amount + get_red($img[$i])*$am1m] , 0, 0)
	}
ret($img)
}


ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

Version 1_0 is ready.

There were still two mayor mistakes. Made some other improvements too.

Demo, ziped code below the video


audio driven video fx 1_0.pixi.zip
CODE zipped
(4.57 KiB) Downloaded 6 times
The parameter settings were, IIRC
top right:

preamp = 1
bands = 7
overlayonback = 0
offsetmax = 1.333
preeq = 255
posteq = 255
reverseeq = 255
posteqall = 0
frametransparency = 55
alphamin = 103


bottom left:

preamp = 0.9
bands = 7
overlayonback = 1
offsetmax = 1.333
preeq = 127
posteq = 255
reverseeq = 127
posteqall = 0
frametransparency = 55
alphamin = 103


bottum right:

preamp = 0.9
bands = 7
overlayonback = 0
offsetmax = 2.6
preeq = 0
posteq = 0
reverseeq = 0
posteqall = 0
frametransparency = 95
alphamin = 63

Its quite smooth now.
Also there are speed variations depending on spectrum.
Blocky artefacts are from the compression of the source material.
ainegil
Posts: 105
Joined: Thu Sep 22, 2022 11:37 pm

Re: Audio driven image processing

Post by ainegil »

Version 1_01 now in COLOR.

2 parameters, one for base saturation, one to increase saturation per auditory band.

Known issues: I dont do proper garbage collection / cleaning of buffers on quit,
so you may need to restart Pixilang if you do multiple renderings.

Demo video will follow in a few minutes.
audio driven video fx 1_01_color.pixi.zip
Code
(5.06 KiB) Downloaded 2 times
EDIT:
Demo Video


The calculations are done on CbCr plane, taking the headroom of the max component and modulating each component realtive to the headroom with the bands value.
Post Reply