trying to make a wav/sunvox visualizer: a pixilang learner's thread

AutumnCheney · Post by **AutumnCheney** » Sat Oct 08, 2022 4:16 am

actually, i lied: i have another question: how can i "fill in" the space underneath the spectrum? in ainegil's example, it's just lines where the points on the spectrum are, but i'd like the spaces between the lines to be filled as well.

ainegil · Post by **ainegil** » Sat Oct 08, 2022 4:45 pm

I dont have time to make an example right now,

but instead of looping through the fft index, you would loop on a pitch scale (for instance)
and convert pitch to frequency / fft index, and read two (or 4) values from tbe fft and interpolate between them.
Then you can draw a series of lines to fill.

example , pseudo code

X FROM PIITCH 20 TO PITCH 128

CONVERT X TO Hz, CONVERT Hz TO FFT INDEX ( fractional value)

GET AMPlITUDE OF INDEX (floor) , AMPLITUDE OF INDEX (floor +1)

INTERPOLATE APLITUDES WITH FRACTIONAL REMINDER OF floor

DRAW LINE X, 0, X, amplitude

Edit:

the frequency of an index is Index * SampleReate / FFTSize

so Hz / (SR / FFTSize) is Index

ainegil · Post by **ainegil** » Sat Oct 08, 2022 11:26 pm

here is an example, its also nicer structure than the previous one,
and also has cepstrum which is the spectrum of the spectrum, and
in this very example gives you the notes played (at least its supposed to)
its on a pitch scale

Code: Select all

//
// WAV file player
// How to use: change the SETUP part and run this program to play the specified file
//

// ########################################
// ## SETUP ###############################
// ########################################

//Input WAV file:
filename = "sound_files/Echos Flower.wav"
//filename = "audio.wav"

//Output MJPEG AVI video file:
//avi_filename = "wav_player.avi"

//Graphics options:
//xsize = 1280 div 2
//ysize = 720 div 2
//fps = 25
//text = new( 2, 1, INT ) text[ 0 ] = "Some text 1" text[ 1 ] = "Some text 2"
//text = new( 2, 1, INT ) text[ 0 ] = "Piano in SunVox 1.9.2" text[ 1 ] = "Piano in SunVox 1.9.3"
text_scale = 2

//Sound options:
sample_rate_scale = 1

// ########################################
// ## SETUP COMPLETE ######################
// ########################################

if xsize == 0 { ss = 480 } else { ss = xsize }
set_pixel_size( WINDOW_XSIZE / ss )
resize( get_screen(), WINDOW_XSIZE, WINDOW_YSIZE )

scr = get_screen()
if xsize != 0 && ysize != 0 { resize( scr, xsize, ysize ) }
xsize = get_xsize( scr )
ysize = get_ysize( scr )
hxsize = xsize / 2
hysize = ysize / 2

if fps == 0 { fps = 30 }

wav = load( filename )
logf( "WAV INFO:\n" )
logf( "  Sample Rate: %d\n", wav.sample_rate )
logf( "  Channels: %d\n", wav.channels )
logf( "  Loop Start (sample number): %d\n", wav.loop_start )
logf( "  Loop Length (number of samples): %d\n", wav.loop_len )
logf( "  Loop Type (0-none; 1-normal; 2-bidirectional): %d\n", wav.loop_type )
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

include "../../lib/mjpeg.pixi"

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}

gfx_init(

)
while 1
{
    gfx_frame()
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if wav_ptr >= wav_size { breakall }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { breakall } }
}
gfx_deinit()
if vo
{
    //Close Video Export:
    mjpeg_encoder_close( vo_encoder )
    fclose( vo_f )
    i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }
    remove( vo_audio_ch_bufs )
    remove( vo_audio_buf )
}

fn gfx_init()
{
    vals = new( xsize, 1, FLOAT )
    clean( vals )

ftsize = 1024
hftsize = ftsize / 2
qftsize = ftsize / 4

bufre = new( ftsize, 1, FLOAT )
bufim = new( ftsize, 1, FLOAT )
bufdb = new( ftsize, 1, FLOAT )
cepsre = new( hftsize, 1, FLOAT )
cepsim = new( hftsize, 1, FLOAT )

samplerate = 44100

}

fn gfx_deinit()
{
    remove( vals )
}

fn gfx_frame()
{
clear()


$p = wav_ptr
$t = $p / wav_size

$i = 0  // read from wav -------------------------
while $i < ftsize{
  $v = wav[ $p + $i * wav_channels ] / wav_amp_max
  $win = sin( M_PI*$i / ftsize ) // arbitrary window function
  bufre[$i] = $v * $win
  bufim[$i] = 0
$i = $i +1
}

fft(0, bufim, bufre,ftsize)

$i = 0 //  calculate dB amplitude -------------------
while $i < hftsize {
  $v = bufre[ $i ] * bufre[ $i ] + bufim[ $i ] * bufim[ $i ]
  $v = sqrt( $v )      // amplitude

  $v = 20 * log10( $v )  // dB scale
  bufdb[ $i ] = $v
  
// read values for optional cepstrum too
  $win = sin( 0.5 * M_PI * $i / hftsize )
  $filt = pow( 0.5, log2( $i +1) )

  cepsre[ $i ] = $filt * $win*$v / ($i+1)
  cepsim[ $i ] = 0
$i = $i +1
}

// get cepstrum
fft(0, cepsre, cepsim, hftsize)
    
   
// draw dB spectrum ‐----------------------------------------------------------------

binwidth = samplerate / ftsize
$p = 24 //  loop on pitch scale
while $p < 135 {
  $f = pow( 2, $p / 12) * 8.175799 // Hz
  $bin = $f / binwidth
  $idx = floor( $bin )
  $frac = $bin - $idx
  $dB = ( 1 - $frac ) * bufdb[ $idx ] + $frac * bufdb[ $idx +1 ]

  line( $p, 120, $p, -$dB, WHITE)
  //dot( $p, $dB, WHITE)
$p = $p +1
}

// draw cepstrum experimental ‐----------------------------------------------------------------


$i = 1 //  loop through cepstrum
while $i < qftsize {
  $f = samplerate / $i
  $p = log2( $f / 8.175799 ) * 12 // pitch
  
  $v = cepsre[ $i ] * cepsre[ $i ] + cepsim[ $i ] * cepsim[ $i ]
  $v = sqrt( $v )      // amplitude
  $v = 20 * log10( $v )  // dB scale

  line( $p - 160, 120, $p -160, -$v, BLUE)
 
$i = $i +1
}


}

fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size { ret( 0 ) }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    ret( 1 )
}

and heres a video based on that ir rather the other way, stripped the code from my video code

AutumnCheney · Post by **AutumnCheney** » Sun Oct 09, 2022 3:35 am

ainegil wrote: ↑Sat Oct 08, 2022 11:26 pm here is an example, its also nicer structure than the previous one,
and also has cepstrum which is the spectrum of the spectrum, and
in this very example gives you the notes played (at least its supposed to)
its on a pitch scale

Code: Select all

//
// WAV file player
// How to use: change the SETUP part and run this program to play the specified file
//

// ########################################
// ## SETUP ###############################
// ########################################

//Input WAV file:
filename = "sound_files/Echos Flower.wav"
//filename = "audio.wav"

//Output MJPEG AVI video file:
//avi_filename = "wav_player.avi"

//Graphics options:
//xsize = 1280 div 2
//ysize = 720 div 2
//fps = 25
//text = new( 2, 1, INT ) text[ 0 ] = "Some text 1" text[ 1 ] = "Some text 2"
//text = new( 2, 1, INT ) text[ 0 ] = "Piano in SunVox 1.9.2" text[ 1 ] = "Piano in SunVox 1.9.3"
text_scale = 2

//Sound options:
sample_rate_scale = 1

// ########################################
// ## SETUP COMPLETE ######################
// ########################################

if xsize == 0 { ss = 480 } else { ss = xsize }
set_pixel_size( WINDOW_XSIZE / ss )
resize( get_screen(), WINDOW_XSIZE, WINDOW_YSIZE )

scr = get_screen()
if xsize != 0 && ysize != 0 { resize( scr, xsize, ysize ) }
xsize = get_xsize( scr )
ysize = get_ysize( scr )
hxsize = xsize / 2
hysize = ysize / 2

if fps == 0 { fps = 30 }

wav = load( filename )
logf( "WAV INFO:\n" )
logf( "  Sample Rate: %d\n", wav.sample_rate )
logf( "  Channels: %d\n", wav.channels )
logf( "  Loop Start (sample number): %d\n", wav.loop_start )
logf( "  Loop Length (number of samples): %d\n", wav.loop_len )
logf( "  Loop Type (0-none; 1-normal; 2-bidirectional): %d\n", wav.loop_type )
wav_ptr = 0
wav_size = get_size( wav ) //number of frames
wav_channels = wav.channels
wav_amp_max = 256
type = get_type( wav )
if type == INT16 { wav_amp_max = 1 << 15 }
if type == INT32 { wav_amp_max = 1 << 30 }
if type == FLOAT32 { wav_amp_max = 1 }

include "../../lib/mjpeg.pixi"

if avi_filename != 0
{
    vo = 1
    vo_f = fopen( avi_filename, "wb" )
    if vo_f <= 0 { logf( "Can't open video file for writing\n" ) halt }
    vo_encoder = mjpeg_encoder_open(
	fps,
	xsize,
	ysize,
	90, //Quality
	wav_channels, //Audio channels
	wav.sample_rate * sample_rate_scale, //Audio frames per second
	get_type( wav ), //Audio sample type
	MJPEG_ENCODER_FLAG_USEINDEX | MJPEG_ENCODER_FLAG_HASSOUND, //Flags
	vo_f )
    vo_audio_buf_size = mjpeg_encoder_get_audio_size( vo_encoder ) //Number of frames per audio chunk
    vo_audio_buf = new( vo_audio_buf_size * wav_channels, 1, get_type( wav ) )
    vo_audio_ch_bufs = new( wav_channels, 1, INT )
    i = 0 while i < wav_channels { vo_audio_ch_bufs[ i ] = new( vo_audio_buf_size, 1, get_type( wav ) ) i + 1 }
    logf( "Audio buffer size: %d frames\n", vo_audio_buf_size )
}
else
{
    set_audio_callback( audio_callback, 0, wav.sample_rate * sample_rate_scale, get_type( wav ), wav_channels, AUDIO_FLAG_INTERP2 )

    rate1 = get_audio_sample_rate( 0 )
    rate2 = get_audio_sample_rate( 1 )
    logf( "Local (defined by the set_audio_callback()) sample rate: %d Hz\n", rate1 )
    logf( "Global (defined in the global Pixilang preferences) sample rate: %d Hz\n", rate2 )
    if rate1 != rate2
    {
	logf( "%d != %d, so resampling will be enabled\n", rate1, rate2 )
    }
}

gfx_init(

)
while 1
{
    gfx_frame()
    if vo 
    {
	//Video export:
	audio_callback( 0, 0, vo_audio_ch_bufs, vo_audio_buf_size, 0, -1, 0 )
	i = 0 while i < wav_channels 
	{
	    copy( vo_audio_buf, vo_audio_ch_bufs[ i ], i, 0, vo_audio_buf_size, wav_channels, 1 )
	    i + 1 
	}
	mjpeg_encoder_write_image( vo_encoder, scr )
        mjpeg_encoder_write_audio( vo_encoder, vo_audio_buf, 0, 0 )
	mjpeg_encoder_next_frame( vo_encoder )
	frame()
    }
    else
    {
	frame( 1000 / fps )
    }
    if wav_ptr >= wav_size { breakall }
    while( get_event() ) { if EVT[ EVT_TYPE ] == EVT_QUIT { breakall } }
}
gfx_deinit()
if vo
{
    //Close Video Export:
    mjpeg_encoder_close( vo_encoder )
    fclose( vo_f )
    i = 0 while i < wav_channels { remove( vo_audio_ch_bufs[ i ] ) i + 1 }
    remove( vo_audio_ch_bufs )
    remove( vo_audio_buf )
}

fn gfx_init()
{
    vals = new( xsize, 1, FLOAT )
    clean( vals )

ftsize = 1024
hftsize = ftsize / 2
qftsize = ftsize / 4

bufre = new( ftsize, 1, FLOAT )
bufim = new( ftsize, 1, FLOAT )
bufdb = new( ftsize, 1, FLOAT )
cepsre = new( hftsize, 1, FLOAT )
cepsim = new( hftsize, 1, FLOAT )

samplerate = 44100

}

fn gfx_deinit()
{
    remove( vals )
}

fn gfx_frame()
{
clear()


$p = wav_ptr
$t = $p / wav_size

$i = 0  // read from wav -------------------------
while $i < ftsize{
  $v = wav[ $p + $i * wav_channels ] / wav_amp_max
  $win = sin( M_PI*$i / ftsize ) // arbitrary window function
  bufre[$i] = $v * $win
  bufim[$i] = 0
$i = $i +1
}

fft(0, bufim, bufre,ftsize)

$i = 0 //  calculate dB amplitude -------------------
while $i < hftsize {
  $v = bufre[ $i ] * bufre[ $i ] + bufim[ $i ] * bufim[ $i ]
  $v = sqrt( $v )      // amplitude

  $v = 20 * log10( $v )  // dB scale
  bufdb[ $i ] = $v
  
// read values for optional cepstrum too
  $win = sin( 0.5 * M_PI * $i / hftsize )
  $filt = pow( 0.5, log2( $i +1) )

  cepsre[ $i ] = $filt * $win*$v / ($i+1)
  cepsim[ $i ] = 0
$i = $i +1
}

// get cepstrum
fft(0, cepsre, cepsim, hftsize)
    
   
// draw dB spectrum ‐----------------------------------------------------------------

binwidth = samplerate / ftsize
$p = 24 //  loop on pitch scale
while $p < 135 {
  $f = pow( 2, $p / 12) * 8.175799 // Hz
  $bin = $f / binwidth
  $idx = floor( $bin )
  $frac = $bin - $idx
  $dB = ( 1 - $frac ) * bufdb[ $idx ] + $frac * bufdb[ $idx +1 ]

  line( $p, 120, $p, -$dB, WHITE)
  //dot( $p, $dB, WHITE)
$p = $p +1
}

// draw cepstrum experimental ‐----------------------------------------------------------------


$i = 1 //  loop through cepstrum
while $i < qftsize {
  $f = samplerate / $i
  $p = log2( $f / 8.175799 ) * 12 // pitch
  
  $v = cepsre[ $i ] * cepsre[ $i ] + cepsim[ $i ] * cepsim[ $i ]
  $v = sqrt( $v )      // amplitude
  $v = 20 * log10( $v )  // dB scale

  line( $p - 160, 120, $p -160, -$v, BLUE)
 
$i = $i +1
}


}

fn audio_callback(
    $stream, 
    $userdata, 
    $channels, 
    $frames, 
    $output_time_in_system_ticks, 
    $in_channels, 
    $latency_in_frames )
{
    if wav_ptr >= wav_size { ret( 0 ) }
    $c = 0 while( $c < wav_channels )
    {
	copy( $channels[ $c ], wav, 0, wav_ptr + $c, $frames, 1, wav_channels )
	$c + 1
    }
    wav_ptr + $frames * wav_channels
    ret( 1 )
}

and heres a video based on that ir rather the other way, stripped the code from my video code

this is really cool! also, i didn't know that you're silent broadcast! i followed you on soundcloud, and i really like your music

i'm not sure how i could integrate your code into mine, but maybe if you looked at my code you could come up with something?

i'll attach a zip to this post, with the necessary libs included. if you run the boot.pixi file (and change the wav file pointer in the code), you'll see a spectrum where the points on the spectrum are connected by lines. i'd like the area underneath these lines to be filled (not all the way to the bottom of the screen, just to the bottom of the spectrum)

hopefully i don't come off as being lazy, this is just a thing i have no clue how to implement with the way my code is now

thank you in advance!

ainegil · Post by **ainegil** » Sun Oct 09, 2022 4:47 am

Its 1 30 am here, so I wont look at this now, maybe tomorrow but cant promise.

Anyway, in my example look at the frame function, its nightradios template for loading sounds.

There ignore the few cepstrum related lines they turned out to be nonsense.

The white line drawing is the spectrum, its drawn line by line, and thats also whats needed in your example
If you want a fill and line on top you need to draw the top line with dots to make sure they match the fill.

Maybe I can explain my code tomorrow I think that makes more sense. Its not very difficult actually.

It just loops to draw one line next to the other, and looks up the endpoint in the values stored in bufdb.

Just look for the white line drawing and work backwards from there. Change the starting point of the lines to move and make the area smaller. Will explain it better tomorrow

AutumnCheney · Post by **AutumnCheney** » Sun Oct 09, 2022 6:36 am

ainegil wrote: ↑Sun Oct 09, 2022 4:47 am Its 1 30 am here, so I wont look at this now, maybe tomorrow but cant promise.

Anyway, in my example look at the frame function, its nightradios template for loading sounds.

There ignore the few cepstrum related lines they turned out to be nonsense.

The white line drawing is the spectrum, its drawn line by line, and thats also whats needed in your example
If you want a fill and line on top you need to draw the top line with dots to make sure they match the fill.

Maybe I can explain my code tomorrow I think that makes more sense. Its not very difficult actually.

It just loops to draw one line next to the other, and looks up the endpoint in the values stored in bufdb.

Just look for the white line drawing and work backwards from there. Change the starting point of the lines to move and make the area smaller. Will explain it better tomorrow

oh, now i get it! i implemented it in my demo code, and it's way better than the old one. i also added a function to cut out everything below the specified db, so i won't have inaudible stuff cluttering the spectrum, as well as scaling functions to position it the way i want

one more question: how do i smooth out the spectrum animation? it's very jittery right now, and i'd like it to be more interpolated, so to say

ainegil · Post by **ainegil** » Sun Oct 09, 2022 1:22 pm

limiting the dB range for display is necessary, I just didnt put that in to keep the example simple and general.

Smoothing depends on the reason for jittering, it coukd be that the frame rate is too low

a way to smooth things is to add a smoothing lowpass filter or a peak detector.

for instance if you have the dB values in a container, you can use something like

oldvalue = container[ index]
container[index] = oldvalue + ( newvalue - oldvalue ) * smoothing

this is the same as
(1-smoothing) * oldvalue + smoothing * newvalue

however this also smoothes transients and makes it lag and miss fast changes.

so a peak detector does the same, but only

if oldvalue > newvalue {
oldvalue = container[ index]
container[index] = oldvalue + ( newvalue - oldvalue ) * smoothing
} else {
container[index] = newvalue
}

so this reacts fast on onsets but smoothes decays

AutumnCheney · Post by **AutumnCheney** » Mon Oct 10, 2022 12:07 am

ainegil wrote: ↑Sun Oct 09, 2022 1:22 pm limiting the dB range for display is necessary, I just didnt put that in to keep the example simple and general.

Smoothing depends on the reason for jittering, it coukd be that the frame rate is too low

a way to smooth things is to add a smoothing lowpass filter or a peak detector.

for instance if you have the dB values in a container, you can use something like

oldvalue = container[ index]
container[index] = oldvalue + ( newvalue - oldvalue ) * smoothing

this is the same as
(1-smoothing) * oldvalue + smoothing * newvalue

however this also smoothes transients and makes it lag and miss fast changes.

so a peak detector does the same, but only

if oldvalue > newvalue {
oldvalue = container[ index]
container[index] = oldvalue + ( newvalue - oldvalue ) * smoothing
} else {
container[index] = newvalue
}

so this reacts fast on onsets but smoothes decays

i'm happy with the first method you described

now i can call my little program complete (for now...)!

WarmPlace.ru

trying to make a wav/sunvox visualizer: a pixilang learner's thread

Re: trying to make a wav/sunvox visualizer: a pixilang learner's thread

Re: trying to make a wav/sunvox visualizer: a pixilang learner's thread

Re: trying to make a wav/sunvox visualizer: a pixilang learner's thread

Re: trying to make a wav/sunvox visualizer: a pixilang learner's thread

Re: trying to make a wav/sunvox visualizer: a pixilang learner's thread

Re: trying to make a wav/sunvox visualizer: a pixilang learner's thread

Re: trying to make a wav/sunvox visualizer: a pixilang learner's thread

Re: trying to make a wav/sunvox visualizer: a pixilang learner's thread