Products

SIGN UP LOG IN

Models / Duplicate Detection

Image Spam and Image Theft Prevention

Introduction

This guide will tell you how to prevent repeat submissions of images, detect spammy behaviors and detect image theft where users submit images taken from other users on your properties.

To do so, you will be creating an Image List to fingerprint all your user-generated images. When a user uploads a new image, you will then be able to check it against all fingerprints in the image list, and thus detect if this is a duplicate or near-duplicate image.

Here are the steps to set up and use an Image List:

  1. Create an Image List from your dashboard
  2. Add Images to the list (through the dashboard or through the API)
  3. Check uploaded images against the list

1. Create an Image List

Go to your Sightengine dashboard to create a new list.

Once you have created a new list, retrieve the list id (this is a string starting with ili_), as this will be useful to interact with your newly created list.

2. Add Images to the list

Any image that you want to track should be added to the list. You can do so either from your Sightengine dashboard or through the API.

Option A: Add an Image through the Dashboard

Go your Sightengine dashboard and click on the list you created. You can now add images by clicking the "ADD IMAGE" button and manually uploading images.

Option B: Add an Image through the API

Here is the code to add an image to a list:


curl -X POST 'https://api.sightengine.com/1.0/check.json' \
    -F 'media=@/path/to/image.jpg' \
    -F 'add_to_list={list_id}' \
    -F 'api_user={api_user}' \
    -F 'api_secret={api_secret}'


# this example uses requests
import requests
import json

params = {
  'add_to_list': '{list_id}',
  'api_user': '{api_user}',
  'api_secret': '{api_secret}'
}
files = {'media': open('/path/to/image.jpg', 'rb')}
r = requests.post('https://api.sightengine.com/1.0/check.json', files=files, data=params)

output = json.loads(r.text)


$params = array(
  'media' => new CurlFile('/path/to/image.jpg'),
  'add_to_list' => '{list_id}',
  'api_user' => '{api_user}',
  'api_secret' => '{api_secret}',
);

// this example uses cURL
$ch = curl_init('https://api.sightengine.com/1.0/check.json');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
$response = curl_exec($ch);
curl_close($ch);

$output = json_decode($response, true);


// this example uses axios and form-data
const axios = require('axios');
const FormData = require('form-data');
const fs = require('fs');

data = new FormData();
data.append('media', fs.createReadStream('/path/to/image.jpg'));
data.append('add_to_list', '{list_id}');
data.append('api_user', '{api_user}');
data.append('api_secret', '{api_secret}');

axios({
  method: 'post',
  url:'https://api.sightengine.com/1.0/check.json',
  data: data,
  headers: data.getHeaders()
})
.then(function (response) {
  // on success: handle response
  console.log(response.data);
})
.catch(function (error) {
  // handle error
  if (error.response) console.log(error.response.data);
  else console.log(error.message);
});

The API will return a JSON response with the following structure:

            
            
{
  "status": "success",
  "request": {
    "id": "req_1SJJxJjUHnSVWreApx9fF",
    "timestamp": 1510153177.0043,
    "operations": 1
  },
  "media": {
    "id": "med_1SJDfFuLAFj34TlAMfksaA",
    "uri": "image.jpg"
  }
}
            
        

3. Check uploaded images against the list

Whenever someone uploads an image to your properties, you will want to check if the same image, or a near-duplicate has already been uploaded. Here is the code to do so:


curl -X POST 'https://api.sightengine.com/1.0/check.json' \
    -F 'media=@/path/to/image.jpg' \
    -F 'lists={list_id}' \
    -F 'api_user={api_user}' \
    -F 'api_secret={api_secret}'


# this example uses requests
import requests
import json

params = {
  'lists': '{list_id}',
  'api_user': '{api_user}',
  'api_secret': '{api_secret}'
}
files = {'media': open('/path/to/image.jpg', 'rb')}
r = requests.post('https://api.sightengine.com/1.0/check.json', files=files, data=params)

output = json.loads(r.text)


$params = array(
  'media' => new CurlFile('/path/to/image.jpg'),
  'lists' => '{list_id}',
  'api_user' => '{api_user}',
  'api_secret' => '{api_secret}',
);

// this example uses cURL
$ch = curl_init('https://api.sightengine.com/1.0/check.json');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
$response = curl_exec($ch);
curl_close($ch);

$output = json_decode($response, true);


// this example uses axios and form-data
const axios = require('axios');
const FormData = require('form-data');
const fs = require('fs');

data = new FormData();
data.append('media', fs.createReadStream('/path/to/image.jpg'));
data.append('lists', '{list_id}');
data.append('api_user', '{api_user}');
data.append('api_secret', '{api_secret}');

axios({
  method: 'post',
  url:'https://api.sightengine.com/1.0/check.json',
  data: data,
  headers: data.getHeaders()
})
.then(function (response) {
  // on success: handle response
  console.log(response.data);
})
.catch(function (error) {
  // handle error
  if (error.response) console.log(error.response.data);
  else console.log(error.message);
});

The API will return a JSON response with the following structure:

            
            
{
  "status": "success",
  "request": {
    "id": "req_1SJJxJjUHnSVWreApx9fF",
    "timestamp": 1510153177.0043,
    "operations": 1
  },
  "similarity": [
    {
      "list": {
        "id": "{list_id}",
      },
      "matches": [
        {
          "id": "med_1SJDfFuLAFj34TlAMfksaA",
          "custom_id": null,
          "score": 0.99
        }
      ]
    }
  ],
  "media": {
    "id": "med_1SJJEFuLqeSedThQjhNoS",
    "uri": "image.jpg"
  }
}
            
        

If matches were found, they will be returned under the matches array. For each match, the API will return the following information:

  • the id of the original image, as defined by Sightengine when the original image was added to the list.
  • the custom_id that you set for the original image. This is an optional field and will be null if no custom id was provided.
  • the score of the match. Scores are values between 0 and 1. The higher the score, the likelier this is to be a duplicate. You can usually assume that any match with a score above 0.5 is a duplicate.

Once an image has been approved and added to your properties, do not forget to add it to the Image List, to prevent the same image from being submitted again.

Was this page helpful?

Cookies help us deliver our services. By using our services, you agree to our use of cookies. Learn more

OK