@inproceedings{patil2012interspeech,
<br>abstract = {How do we understand and interpret complex auditory environments in a way that may depend on some stated goals or intentions? Here, we propose a framework that provides a detailed analysis of the spectrotemporal modulations in the acoustic signal, augmented with a discriminative classifier using multilayer perceptrons. We show that such representation is successful at capturing the non-trivial commonalties within a sound class and differences between different classes. It not only surpasses performance of current systems in the literature by about 21{\%}, but proves quite robust for processing multi-source cases. In addition, we test the role of feature re-weighting in improving feature selectivity and signal-to-noise ratio in the direction of a sound class of interest.},
<br>author = {Patil, Kailash and Elhilali, Mounya},
<br>booktitle = {13th Annual Conference of the International Speech Communication Association, INTERSPEECH 2012},
<br>isbn = {9550091023},
<br>keywords = {Acoustic event recognition,Attention,Bottom-up,Scene understanding,Top-down},
<br>pages = {2510--2513},
<br>title = {{Goal-Oriented Auditory Scene Recognition}},
<br>url = {http://www.isca-speech.org/iscaweb/},
<br>volume = {3},
<br>year = {2012}
<br>}