Ok, looks like the Jarvis OS is taking its time. I set out to do a project with the Arduino at home.
I give it a verbal instruction and i get a verbal answer. If it's a specific instruction for some usual activity around the house like switch off the lights, change TV channel, the arduino does it.
Just some quick python code but trying not to reinvent anything(use of alot of 3rd party API).
Procedure:
-Give a voice command
-Translate the speech to text using google's speech recognition engine.
-Query a database for intelligent answer to my query[Wolfram Alpha]
-For specific command, arduino responds appropriately; else:
-Turn text back to speech and reply to command.
I intend to use trueknowledge alongside Wolfram Alpha to ensure i get answers for when one fails, the other covers. Not till Trueknowledge migrates fully to evi.com.
Below is a really quick script which might have alot of errors cos am super excited about my first arduino server in C++ am still sweating with. Sweat with it as you may but i intend to code it more to the bone so as to get more control than those so-many-APIs.
#!/usr/bin/env python
#-*- coding: utf-8 -*-
"""
Simple AI program(JARVIS) that uses the google Api for speech recognition(speech to text),
and also for text to speech, Wolframalpha for answers to the queries
Copyright (c) 2014 (30/01/2014), Kenjoe41.
License: I DON'T KNOW MUCH ABOUT LICENSES. Use as you may, edit as you may, it's a free world.
URL: http://evilzone.org
"""
from pyaudio import PyAudio, paInit16 #recording audio file
import pygst #for playing mp3 stream
import gst, time # " "
from requests import *
from os import system
from json import loads
from wave import open as open_audio
import wolframalpha
class Jarvis:
def __init__(self, file="audio"):
self.rate = 8000 # Google accepts 8000 and 16000 bit rate
self.file = file
self.channel = 1
self.format = paInit16
self.chunk = 1024
self.app_id = 'xxxx-xxxxxx'#App_id got from wolframAlpha site after registration, it's free
#----------------------------------------------------------------#
# SPEECH TO TEXT THROUGH GOOGLE SPEECH RECOGNITION ENGINE #
#----------------------------------------------------------------#
def convert(self):
#converts the recorded wav file into flac which google accepts.
system("sox %s -t wav -r 800 -t flac %s.flac" % (self.file, self.file))
#Option 2: system("ffmpeg -i %s.m4a -vn -ac 1 -ar 16000 -acodec flac %s.flac" % (input_file, self.file)
def record(self, time):
#record and write to file the audio in wav then call convert() to flac
#records for specific time entered, google api accepts <=15 sec
audio = PyAudio()
stream = audio.open(format=self.format, channels=self.channels,
rate=self.rate, input=True,
frames_per_buffer=self.chunk)
print "RECORDING...."
frames=[]
for i in range(0, self.rate / self.chunk * time):
data = stream.read(self.chunk)
frames.append(data)
stream.stop_stream()
stream.close()
audio.terminate()
write_frames = open_audio(self.file, 'wb')
write_frames.setnchannels(self.channel)
write_frames.setsampwidth(audio.get_sample_size(self.format))
write_frames.setframerate(self.rate)
write_frames.writeframes(''.join(frames))
write_frames.close()
self.convert()
def speech_to_text(self, language):
#speech to text url~
stturl = "http://www.google.com/speech-api/v1/recognise?xjerr=1&client=chromium&lang=%s" % (language)
audio = open("%s.flac" % self.file, 'rb').read()
headers = {"Content-Type": "audio/x-flac; rate=8000", 'User-Agent':'Mozilla/5.0'}
r = requests.post(stturl, data=audio, headers=headers)
#print r.text
response = r.read()
phrase = loads(response)['hypotheses'][0]['utterance']
return phrase, response
#-------------------------------------------------------------------#
# WOLFRAM ALPHA API: QUESTION TO ANSWER - ACCEPTS:TEXT #
#-------------------------------------------------------------------#
def wolframalpha_query(self):
client = wolframapha.Client(self.app_id)
phrase, complete_response = self.speech_to_text('en-US')
res = client.query(phrase)
answer = next(res.results).text
return answer
#-------------------------------------------------------------------#
# GOOGLE API: TEXT TO SPEECH #
#-------------------------------------------------------------------#
def text_to_speech(self):
tts_string = '+'.join(self.wolframalpha_query)
#text to speech url~
ttsurl = 'http://translate.google.com/translate_tts?tl=en&q=' + tts_string
player = gst.element_factory_make("playbin", "player")
player.set_property('uri', ttsurl)
player.set_state(gst.STATE_PLAYING)
#requires a delay, if the py process closes before the mp3 has finished it will be cut off.
time.sleep(12)
#-------------------------------------------------------------------#
# TODO: Detect character count, as google limits it to 100 chars #
# break up the sentence, receive the separate audio files and #
# concatenate them in one.[Probably on the weekend.] #
#-------------------------------------------------------------------#