Vi_channel_open_ex bug

hi,i have check vi_channel_open_ex function,i found that vi_channel_power_on_vi_device used chan->capture_data to register into client_list but chan->capture_data is null initialed in vi_capture_init ,i doudt whether this place is a bug,below is the code:

struct tegra_vi_channel *vi_channel_open_ex(unsigned channel, bool is_mem_pinned)
{

struct tegra_vi_channel *chan;
struct vi_channel_drv *chan_drv;
int err;

if (mutex_lock_interruptible(&chdrv_lock))
	return ERR_PTR(-ERESTARTSYS);

chan_drv = chdrv_;

if (chan_drv == NULL || channel >= chan_drv->num_channels) {
	mutex_unlock(&chdrv_lock);
	return ERR_PTR(-ENODEV);
}
mutex_unlock(&chdrv_lock);

chan = kzalloc(sizeof(*chan), GFP_KERNEL);
if (unlikely(chan == NULL))
	return ERR_PTR(-ENOMEM);

chan->drv = chan_drv;
chan->dev = chan_drv->dev;
chan->ndev = chan_drv->ndev;
chan->ops = chan_drv->ops;

err = vi_channel_power_on_vi_device(chan);
if (err < 0)
	goto error;

err = vi_capture_init(chan, is_mem_pinned);
if (err < 0)
	goto error;

mutex_lock(&chan_drv->lock);
if (rcu_access_pointer(chan_drv->channels[channel]) != NULL) {
	mutex_unlock(&chan_drv->lock);
	err = -EBUSY;
	goto rcu_err;
}
   ...

}

static int vi_channel_power_on_vi_device(struct tegra_vi_channel *chan)
{
int ret = 0;

dev_dbg(chan->dev, "vi_channel_power_on_vi_device\n");

ret = nvhost_module_add_client(chan->ndev, chan->capture_data);
if (ret < 0) {
	dev_err(chan->dev, "%s: failed to add vi client\n", __func__);
	return ret;
}

ret = nvhost_module_busy(chan->ndev);
if (ret < 0) {
	dev_err(chan->dev, "%s: failed to power on vi\n", __func__);
	return ret;
}

return 0;

}

int nvhost_module_add_client(struct platform_device *dev, void *priv)
{
struct nvhost_module_client *client;
struct nvhost_device_data *pdata = platform_get_drvdata(dev);

nvhost_dbg_fn("%s num_clks=%d priv=%p", dev->name,
	      pdata->num_clks, priv);

client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client) {
	nvhost_err(&dev->dev,
		   "failed to allocate client structure");
	return -ENOMEM;
}

INIT_LIST_HEAD(&client->node);
client->priv = priv;

mutex_lock(&client_list_lock);
list_add_tail(&client->node, &pdata->client_list);
mutex_unlock(&client_list_lock);

return 0;

}

int vi_capture_init(struct tegra_vi_channel *chan, bool is_mem_pinned)
{
struct vi_capture *capture;
struct device_node *dn;
struct platform_device *rtc_pdev;

dev_dbg(chan->dev, "%s++\n", __func__);
dn = of_find_node_by_path("tegra-camera-rtcpu");
if (of_device_is_available(dn) == 0) {
	dev_err(chan->dev, "failed to find rtcpu device node\n");
	return -ENODEV;
}
rtc_pdev = of_find_device_by_node(dn);
if (rtc_pdev == NULL) {
	dev_err(chan->dev, "failed to find rtcpu platform\n");
	return -ENODEV;
}

capture = kzalloc(sizeof(*capture), GFP_KERNEL);
if (unlikely(capture == NULL)) {
	dev_err(chan->dev, "failed to allocate capture channel\n");
	return -ENOMEM;
}

capture->rtcpu_dev = &rtc_pdev->dev;

init_completion(&capture->control_resp);
init_completion(&capture->capture_resp);

mutex_init(&capture->reset_lock);
mutex_init(&capture->control_msg_lock);
mutex_init(&capture->unpins_list_lock);

capture->vi_channel = chan;
chan->capture_data = capture;
chan->rtcpu_dev = capture->rtcpu_dev;

capture->is_mem_pinned = is_mem_pinned;
capture->channel_id = CAPTURE_CHANNEL_INVALID_ID;

capture->stream_id = NVCSI_STREAM_INVALID_ID;
capture->csi_port = NVCSI_PORT_UNSPECIFIED;
capture->virtual_channel_id = NVCSI_TPG_INVALID_ID;

return 0;

}

Below code should be assign the value.

capture->is_mem_pinned = is_mem_pinned;
capture->channel_id = CAPTURE_CHANNEL_INVALID_ID;

capture->stream_id = NVCSI_STREAM_INVALID_ID;
capture->csi_port = NVCSI_PORT_UNSPECIFIED;
capture->virtual_channel_id = NVCSI_TPG_INVALID_ID;

hi,ShaneCCC
function vi_channel_open_ex call vi_channel_power_on_vi_device,and vi_channel_power_on_vi_device well use chan->capture_data,but chan->capture_data is null at this time,it well be initialized in vi_capture_init,and vi_capture_init call after vi_channel_power_on_vi_device,so i’m doubt whether this code is a bug?
struct tegra_vi_channel *vi_channel_open_ex(unsigned channel, bool is_mem_pinned)
{
struct tegra_vi_channel *chan;
struct vi_channel_drv *chan_drv;
int err;

if (mutex_lock_interruptible(&chdrv_lock))
	return ERR_PTR(-ERESTARTSYS);

chan_drv = chdrv_;
if (chan_drv == NULL || channel >= chan_drv->num_channels) {
	mutex_unlock(&chdrv_lock);
	return ERR_PTR(-ENODEV);
}
mutex_unlock(&chdrv_lock);
chan = kzalloc(sizeof(*chan), GFP_KERNEL);
if (unlikely(chan == NULL))
	return ERR_PTR(-ENOMEM);

chan->drv = chan_drv;
chan->dev = chan_drv->dev;
chan->ndev = chan_drv->ndev;
chan->ops = chan_drv->ops;
err = vi_channel_power_on_vi_device(chan);
if (err < 0)
	goto error;
err = vi_capture_init(chan, is_mem_pinned);
if (err < 0)
	goto error;
mutex_lock(&chan_drv->lock);
if (rcu_access_pointer(chan_drv->channels[channel]) != NULL) {
	mutex_unlock(&chan_drv->lock);
	err = -EBUSY;
	goto rcu_err;
}
rcu_assign_pointer(chan_drv->channels[channel], chan);
mutex_unlock(&chan_drv->lock);

return chan;

rcu_err:
vi_channel_power_off_vi_device(chan);
vi_capture_shutdown(chan);
error:
kfree(chan);
return ERR_PTR(err);
}

My point is the chan->capture_data is a point and vi_channel_power_on_vi_device() and the nvhost_module_add_client() didn’t really access to it context to cause problem.

hi,ShaneCCC,
i don‘t think so,you can reference to remove function named nvhost_module_remove_client(just list below),in this function it will to found out registed nvhost_module_client and remove it. and if i have opened 2 or more camera sensors,it will be work abnormal。

int nvhost_module_add_client(struct platform_device *dev, void *priv)
{
struct nvhost_module_client *client;
struct nvhost_device_data *pdata = platform_get_drvdata(dev);

nvhost_dbg_fn("%s num_clks=%d priv=%p", dev->name,
	      pdata->num_clks, priv);

client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client) {
	nvhost_err(&dev->dev,
		   "failed to allocate client structure");
	return -ENOMEM;
}

INIT_LIST_HEAD(&client->node);
client->priv = priv;

mutex_lock(&client_list_lock);
list_add_tail(&client->node, &pdata->client_list);
mutex_unlock(&client_list_lock);

return 0;

}
EXPORT_SYMBOL(nvhost_module_add_client);

void nvhost_module_remove_client(struct platform_device *dev, void *priv)
{
int i;
struct nvhost_module_client *m;
int found = 0;
struct nvhost_device_data *pdata = platform_get_drvdata(dev);

nvhost_dbg_fn("%s priv=%p", dev->name, priv);

mutex_lock(&client_list_lock);
list_for_each_entry(m, &pdata->client_list, node) {
	if (priv == m->priv) {
		list_del(&m->node);
		found = 1;
		break;
	}
}
if (found) {
	kfree(m);
	for (i = 0; i < pdata->num_clks; i++)
		nvhost_module_update_rate(dev, i);
}
mutex_unlock(&client_list_lock);

}
EXPORT_SYMBOL(nvhost_module_remove_client);

If access the NULL point it will cause the kernel dump.
What’s problem did you meet?

no,just doubt about it.

but i have changed the order of these two function,and test ok.

    err = vi_capture_init(chan, is_mem_pinned);
if (err < 0)
	goto error;
err = vi_channel_power_on_vi_device(chan);
if (err < 0)
	goto error;

What do you mean working abnormal?
What’s your test command?

hi ShaneCCC,
i’m just doubt about it,because nvhost_module_remove_client will remove a nvhost_module_client based on the value of priv,if there are 2 or more nvhost_module_client which registed with priv NULL, so when remove,this function will remove a wrong nvhost_module_client,and nvhost_module_client will affect clk’s config value(in function nvhost_module_update_rate).

void nvhost_module_remove_client(struct platform_device *dev, void *priv)
{
int i;
struct nvhost_module_client *m;
int found = 0;
struct nvhost_device_data *pdata = platform_get_drvdata(dev);

nvhost_dbg_fn("%s priv=%p", dev->name, priv);

mutex_lock(&client_list_lock);
list_for_each_entry(m, &pdata->client_list, node) {
	if (priv == m->priv) {
		list_del(&m->node);
		found = 1;
		break;
	}
}
if (found) {
	kfree(m);
	for (i = 0; i < pdata->num_clks; i++)
		nvhost_module_update_rate(dev, i);
}
mutex_unlock(&client_list_lock);

}