speech_transcribe_diarization_beta.yaml 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
  2. schema_version: 1.2.0
  3. samples:
  4. - region_tag: speech_transcribe_diarization_beta
  5. title: Separating different speakers (Local File) (LRO) (Beta)
  6. description: |
  7. Print confidence level for individual words in a transcription of a short audio file
  8. Separating different speakers in an audio file recording
  9. rpc: LongRunningRecognize
  10. service: google.cloud.speech.v1p1beta1.Speech
  11. request:
  12. - field: audio.content
  13. value: "resources/commercial_mono.wav"
  14. input_parameter: local_file_path
  15. comment: Path to local audio file, e.g. /path/audio.wav
  16. value_is_file: true
  17. - field: config.enable_speaker_diarization
  18. value: true
  19. comment: |
  20. If enabled, each word in the first alternative of each result will be
  21. tagged with a speaker tag to identify the speaker.
  22. - field: config.diarization_speaker_count
  23. value: 2
  24. comment: Optional. Specifies the estimated number of speakers in the conversation.
  25. - field: config.language_code
  26. value: "en-US"
  27. comment: The language of the supplied audio
  28. response:
  29. - loop:
  30. collection: $resp.results
  31. variable: result
  32. body:
  33. - comment:
  34. - First alternative has words tagged with speakers
  35. - define: alternative = result.alternatives[0]
  36. - print:
  37. - "Transcript: %s"
  38. - alternative.transcript
  39. - comment:
  40. - Print the %s of each word
  41. - speaker_tag
  42. - loop:
  43. collection: alternative.words
  44. variable: word
  45. body:
  46. - print:
  47. - 'Word: %s'
  48. - word.word
  49. - print:
  50. - 'Speaker tag: %s'
  51. - word.speaker_tag