Skip to content

Instantly share code, notes, and snippets.

@ventouris
Created January 13, 2022 14:31
Show Gist options
  • Save ventouris/21bfe52fc0a09a265d66400ca0e16785 to your computer and use it in GitHub Desktop.
Save ventouris/21bfe52fc0a09a265d66400ca0e16785 to your computer and use it in GitHub Desktop.
Data Scientist skills
{
"success": true,
"message": "",
"result": {
"id": 3413,
"name": "data scientist",
"description": "People in this job find and interpret rich data sources, manage large amounts of data, merge data sources, ensure consistency of data-sets, and create visualisations to aid in understanding data. They build mathematical models using data, present and communicate data insights and findings to specialists and scientists in their team and if required, to a non-expert audience, and recommend ways to apply the data. They utilise recommendation engines, spam classifiers, sentiment analysers and classifiers for unstructured and semi-structured data.",
"status": "active",
"type": "platform",
"attributes": [],
"translations": {
"en": {
"name": "data scientist",
"description": "People in this job find and interpret rich data sources, manage large amounts of data, merge data sources, ensure consistency of data-sets, and create visualisations to aid in understanding data. They build mathematical models using data, present and communicate data insights and findings to specialists and scientists in their team and if required, to a non-expert audience, and recommend ways to apply the data. They utilise recommendation engines, spam classifiers, sentiment analysers and classifiers for unstructured and semi-structured data."
},
"fr": {
"name": "scientifique des données",
"description": "Les personnes exerçant cette profession trouvent et interprètent de riches sources de données, gèrent de grandes quantités de données, fusionnent des sources de données, assurent la cohérence des ensembles de données et créent des visualisations pour aider à comprendre les données. Elles construisent des modèles mathématiques en utilisant des données, présentent et communiquent les idées et les résultats des données aux spécialistes et aux scientifiques de leur équipe et, si nécessaire, à un public non-expert, et recommandent des façons d'appliquer les données. Elles utilisent des moteurs de recommandation, des classificateurs de spam, des analyseurs de sentiments et des classificateurs pour les données non structurées et semi-structurées."
}
},
"alternative_titles": [
{
"id": 2069,
"name": "data research scientist"
},
{
"id": 2070,
"name": "research data scientist"
},
{
"id": 2072,
"name": "data expert"
},
{
"id": 2073,
"name": "data engineer"
},
{
"id": 41804,
"name": "big data scientist"
},
{
"id": 57811,
"name": "machine learning engineer"
}
],
"essential_skills": [
{
"id": 26994,
"skilltype_id": 3,
"name": "Team spirit",
"description": "Coordinating with others / adjusting actions in relation to the others' actions.",
"automation_index": null,
"remote_index": null,
"is_knowledge": false,
"type": "platform",
"is_soft": true
},
{
"id": 26995,
"skilltype_id": 3,
"name": "Oral communication",
"description": "Talking to others to convey information effectively.",
"automation_index": null,
"remote_index": null,
"is_knowledge": false,
"type": "platform",
"is_soft": true
},
{
"id": 27016,
"skilltype_id": 3,
"name": "Written communication",
"description": "Communicating effectively in writing as appropriate for the needs of the audience.",
"automation_index": null,
"remote_index": null,
"is_knowledge": false,
"type": "platform",
"is_soft": true
},
{
"id": 26999,
"skilltype_id": 4,
"name": "Decision-making and judgement",
"description": "Considering the relative costs and benefits of potential actions in order to choose the most appropriate one",
"automation_index": null,
"remote_index": null,
"is_knowledge": false,
"type": "platform",
"is_soft": true
},
{
"id": 28880,
"skilltype_id": 4,
"name": "Analytical skills",
"description": "Ability to analyze and research a problem or a topic, to decompose it in smaller pieces and develop an in-depth understanding about it",
"automation_index": null,
"remote_index": null,
"is_knowledge": false,
"type": "platform",
"is_soft": true
},
{
"id": 26618,
"skilltype_id": 2,
"name": "develop data processing applications",
"description": "Create a customised software for processing data by selecting and using the appropriate computer programming language in order for an ICT system to produce demanded output based on expected input.",
"automation_index": "35.10",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 17765,
"skilltype_id": 2,
"name": "build recommender systems",
"description": "Construct recommendation systems based on large data sets using programming languages or computer tools to create a subclass of information filtering system that seeks to predict the rating or preference a user gives to an item.",
"automation_index": "61.37",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 19994,
"skilltype_id": 2,
"name": "collect ICT data",
"description": "Gather data by designing and applying search and sampling methods.",
"automation_index": "41.70",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 17416,
"skilltype_id": 2,
"name": "handle data samples",
"description": "Collect and select a set of data from a population by a statistical or other defined procedure.",
"automation_index": "34.60",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 19544,
"skilltype_id": 2,
"name": "implement data quality processes",
"description": "Apply quality analysis, validation and verification techniques on data to check data quality integrity.",
"automation_index": "97.77",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 15964,
"skilltype_id": 2,
"name": "establish data processes",
"description": "Use ICT tools to apply mathematical, algorithmic or other data manipulation processes in order to create information.",
"automation_index": "18.85",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 13851,
"skilltype_id": 2,
"name": "normalise data",
"description": "Reduce data to their accurate core form (normal forms) in order to achieve such results as minimisation of dependency, elimination of redundancy, increase of consistency.",
"automation_index": "87.76",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 22844,
"skilltype_id": 2,
"name": "online analytical processing",
"description": "The online tools which analyse, aggregate and present multi-dimensional data enabling users to interactively and selectively extract and view data from specific points of view.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 22400,
"skilltype_id": 2,
"name": "manage data collection systems",
"description": "Develop and manage methods and strategies used to maximise data quality and statistical efficiency in the collection of data, in order to ensure the gathered data are optimised for further processing.",
"automation_index": "44.80",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 21365,
"skilltype_id": 2,
"name": "information categorisation",
"description": "The process of classifying the information into categories and showing relationships between the data for some clearly defined purposes.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 17788,
"skilltype_id": 2,
"name": "perform data cleansing",
"description": "Detect and correct corrupt records from data sets, ensure that the data become and remain structured according to guidelines.",
"automation_index": "61.88",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 15504,
"skilltype_id": 2,
"name": "data mining ",
"description": "The methods of artificial intelligence, machine learning, statistics and databases used to extract content from a dataset.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 19111,
"skilltype_id": 2,
"name": "information extraction",
"description": "The techniques and methods used for eliciting and extracting information from unstructured or semi-structured digital documents and sources.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 19253,
"skilltype_id": 2,
"name": "design database scheme ",
"description": "Draft a database scheme by following the Relational Database Management System (RDBMS) rules in order to create a logically arranged group of objects such as tables, columns and processes.",
"automation_index": "40.16",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 15102,
"skilltype_id": 2,
"name": "interpret current data ",
"description": "Analyse data gathered from sources such as market data, scientific papers, customer requirements and questionnaires which are current and up-to-date in order to assess development and innovation in areas of expertise.",
"automation_index": "27.79",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 26920,
"skilltype_id": 2,
"name": "data models",
"description": "The techniques and existing systems used for structuring data elements and showing relationships between them, as well as methods for interpreting the data structures and relationships.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 16329,
"skilltype_id": 2,
"name": "visual presentation techniques",
"description": "The visual representation and interaction techniques, such as histograms, scatter plots, surface plots, tree maps and parallel coordinate plots, that can be used to present abstract numerical and non-numerical data, in order to reinforce the human understanding of this information.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 21854,
"skilltype_id": 2,
"name": "query languages",
"description": "The field of standardised computer languages for retrieval of information from a database and of documents containing the needed information.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 23892,
"skilltype_id": 2,
"name": "report analysis results",
"description": "Produce research documents or give presentations to report the results of a conducted research and analysis project, indicating the analysis procedures and methods which led to the results, as well as potential interpretations of the results.",
"automation_index": "34.60",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 20253,
"skilltype_id": 2,
"name": "statistics",
"description": "The study of statistical theory, methods and practices such as collection, organisation, analysis, interpretation and presentation of data. It deals with all aspects of data including the planning of data collection in terms of the design of surveys and experiments in order to forecast and plan work-related activities.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
}
],
"optional_skills": [
{
"id": 13653,
"skilltype_id": 2,
"name": "manage ICT data classification",
"description": "Oversee the classification system an organisation uses to organise its data. Assign an owner to each data concept or bulk of concepts and determine the value of each item of data.",
"automation_index": "41.19",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 17603,
"skilltype_id": 2,
"name": "manage ICT data architecture",
"description": "Oversee regulations and use ICT techniques to define the information systems architecture and to control data gathering, storing, consolidation, arrangement and usage in an organisation.",
"automation_index": "27.32",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 17251,
"skilltype_id": 2,
"name": "integrate ICT data",
"description": "Combine data from sources to provide unified view of the set of these data.",
"automation_index": "61.88",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 17011,
"skilltype_id": 2,
"name": "perform data mining",
"description": "Explore large datasets to reveal patterns using statistics, database systems or artificial intelligence and present the information in a comprehensible way.",
"automation_index": "67.38",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 23814,
"skilltype_id": 2,
"name": "deliver visual presentation of data",
"description": "Create visual representations of data such as charts or diagrams for easier understanding.",
"automation_index": "34.60",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 23926,
"skilltype_id": 2,
"name": "unstructured data",
"description": "The information that is not arranged in a pre-defined manner or does not have a pre-defined data model and is difficult to understand and find patterns in without using techniques such as data mining.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 26734,
"skilltype_id": 2,
"name": "create data models",
"description": "Use specific techniques and methodologies to analyse the data requirements of an organisation's business processes in order to create models for these data, such as conceptual, logical and physical models. These models have a specific structure and format.",
"automation_index": "30.18",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 24283,
"skilltype_id": 2,
"name": "data quality assessment",
"description": "The process of revealing data issues using ​quality indicators, measures and metrics in order to plan data cleansing and data enrichment strategies according to data quality criteria.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 18022,
"skilltype_id": 2,
"name": "define data quality criteria",
"description": "Specify the criteria by which data quality is measured for business purposes, such as inconsistencies, incompleteness, usability for purpose and accuracy.",
"automation_index": "41.70",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 14547,
"skilltype_id": 2,
"name": "business intelligence",
"description": "The tools used to transform large amounts of raw data into relevant and helpful business information.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 26986,
"skilltype_id": 2,
"name": "natural language processing",
"description": "The technologies which enable ICT devices to understand and interact with users through human language.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 21996,
"skilltype_id": 2,
"name": "manage data",
"description": "Administer all types of data resources through their lifecycle by performing data profiling, parsing, standardisation, identity resolution, cleansing, enhancement and auditing. Ensure the data is fit for purpose, using specialised ICT tools to fulfil the data quality criteria.",
"automation_index": "41.19",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 21653,
"skilltype_id": 2,
"name": "resource description framework query language",
"description": "The query languages such as SPARQL which are used to retrieve and manipulate data stored in Resource Description Framework format (RDF).",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
},
{
"id": 16177,
"skilltype_id": 2,
"name": "execute analytical mathematical calculations",
"description": "Apply mathematical methods and make use of calculation technologies in order to perform analyses and devise solutions to specific problems.",
"automation_index": "34.10",
"remote_index": "1.00",
"is_knowledge": false,
"type": "platform",
"is_soft": false
},
{
"id": 15278,
"skilltype_id": 2,
"name": "computer programming",
"description": "The techniques and principles of software development, such as analysis, algorithms, coding, testing and compiling of programming paradigms (e.g. object oriented programming, functional programming) and of programming languages.",
"automation_index": null,
"remote_index": null,
"is_knowledge": true,
"type": "platform",
"is_soft": false
}
],
"client_job_properties": []
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment