@inproceedings{Bellur2020b,
abstract = {Attention plays a vital role in helping us navigate our acoustic surroundings. It guides sensory processing to sift through the cacophony of sounds in everyday scenes and modulates the representation of targets sounds relative to distractors. While its conceptual role is well established, there are competing theories as to how attentional feedback operates in the brain and how its mechanistic underpinnings can be incorporated into computational systems. These interpretations differ in the manner in which attentional feedback operates as an information bottleneck to aid perception. One interpretation is that attention adapts the sensory mapping itself to encode only the target cues. An alternative interpretation is that attention behaves as a gain modulator that enhances the target cues after they are encoded. Further, the theory of temporal coherence states that attention seeks to bind temporally coherent features relative to anchor features as determined by prior knowledge of target objects. In this work, we study these competing theories within a deep-network framework for the task of music source separation. We show that these theories complement each other, and when employed together, yield state of the art performance in music source separation. We further show that systems with attentional mechanisms can be made to scale to mismatched conditions by retuning only the attentional modules with minimal data.},
author = {Bellur, Ashwin and Elhilali, Mounya},
booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
doi = {10.1109/ICASSP40776.2020.9054552},
isbn = {978-1-5090-6631-5},
issn = {15206149},
keywords = {Attention,Bio-mimetic,Coherence,Feature tuning,Music source separation},
pages = {8718--8722},
title = {{Bio-Mimetic Attentional Feedback in Music Source Separation}},
url = {https://ieeexplore.ieee.org/document/9054552/},
volume = {2020-May},
year = {2020}
}